blob: 257e6be96b658bf8bbe4ed313e961b26150b60f0 [file] [log] [blame]
Matt Arsenault81a70952016-05-21 01:53:33 +00001; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Jan Vesely4a33bc62014-08-12 17:31:17 +00003; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
Tom Stellard79243d92014-10-01 17:15:17 +00005; FUNC-LABEL: {{^}}sdiv24_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +00006; SI: v_cvt_f32_i32
7; SI: v_cvt_f32_i32
8; SI: v_rcp_f32
9; SI: v_cvt_i32_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +000010
11; EG: INT_TO_FLT
12; EG-DAG: INT_TO_FLT
13; EG-DAG: RECIP_IEEE
14; EG: FLT_TO_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +000016 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
David Blaikiea79ac142015-02-27 21:17:42 +000017 %num = load i8, i8 addrspace(1) * %in
18 %den = load i8, i8 addrspace(1) * %den_ptr
Jan Vesely4a33bc62014-08-12 17:31:17 +000019 %result = sdiv i8 %num, %den
20 store i8 %result, i8 addrspace(1)* %out
21 ret void
22}
23
Tom Stellard79243d92014-10-01 17:15:17 +000024; FUNC-LABEL: {{^}}sdiv24_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +000025; SI: v_cvt_f32_i32
26; SI: v_cvt_f32_i32
27; SI: v_rcp_f32
28; SI: v_cvt_i32_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +000029
30; EG: INT_TO_FLT
31; EG-DAG: INT_TO_FLT
32; EG-DAG: RECIP_IEEE
33; EG: FLT_TO_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +000035 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
David Blaikiea79ac142015-02-27 21:17:42 +000036 %num = load i16, i16 addrspace(1) * %in, align 2
37 %den = load i16, i16 addrspace(1) * %den_ptr, align 2
Jan Vesely4a33bc62014-08-12 17:31:17 +000038 %result = sdiv i16 %num, %den
39 store i16 %result, i16 addrspace(1)* %out, align 2
40 ret void
41}
42
Tom Stellard79243d92014-10-01 17:15:17 +000043; FUNC-LABEL: {{^}}sdiv24_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000044; SI: v_cvt_f32_i32
45; SI: v_cvt_f32_i32
46; SI: v_rcp_f32
47; SI: v_cvt_i32_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +000048
49; EG: INT_TO_FLT
50; EG-DAG: INT_TO_FLT
51; EG-DAG: RECIP_IEEE
52; EG: FLT_TO_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000053define amdgpu_kernel void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +000054 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +000055 %num = load i32, i32 addrspace(1) * %in, align 4
56 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Vesely4a33bc62014-08-12 17:31:17 +000057 %num.i24.0 = shl i32 %num, 8
58 %den.i24.0 = shl i32 %den, 8
59 %num.i24 = ashr i32 %num.i24.0, 8
60 %den.i24 = ashr i32 %den.i24.0, 8
61 %result = sdiv i32 %num.i24, %den.i24
62 store i32 %result, i32 addrspace(1)* %out, align 4
63 ret void
64}
65
Tom Stellard79243d92014-10-01 17:15:17 +000066; FUNC-LABEL: {{^}}sdiv25_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000067; SI-NOT: v_cvt_f32_i32
68; SI-NOT: v_rcp_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +000069
70; EG-NOT: INT_TO_FLT
71; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000072define amdgpu_kernel void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +000073 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +000074 %num = load i32, i32 addrspace(1) * %in, align 4
75 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Vesely4a33bc62014-08-12 17:31:17 +000076 %num.i24.0 = shl i32 %num, 7
77 %den.i24.0 = shl i32 %den, 7
78 %num.i24 = ashr i32 %num.i24.0, 7
79 %den.i24 = ashr i32 %den.i24.0, 7
80 %result = sdiv i32 %num.i24, %den.i24
81 store i32 %result, i32 addrspace(1)* %out, align 4
82 ret void
83}
84
Tom Stellard79243d92014-10-01 17:15:17 +000085; FUNC-LABEL: {{^}}test_no_sdiv24_i32_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +000086; SI-NOT: v_cvt_f32_i32
87; SI-NOT: v_rcp_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +000088
89; EG-NOT: INT_TO_FLT
90; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000091define amdgpu_kernel void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +000092 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +000093 %num = load i32, i32 addrspace(1) * %in, align 4
94 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Vesely4a33bc62014-08-12 17:31:17 +000095 %num.i24.0 = shl i32 %num, 8
96 %den.i24.0 = shl i32 %den, 7
97 %num.i24 = ashr i32 %num.i24.0, 8
98 %den.i24 = ashr i32 %den.i24.0, 7
99 %result = sdiv i32 %num.i24, %den.i24
100 store i32 %result, i32 addrspace(1)* %out, align 4
101 ret void
102}
103
Tom Stellard79243d92014-10-01 17:15:17 +0000104; FUNC-LABEL: {{^}}test_no_sdiv24_i32_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000105; SI-NOT: v_cvt_f32_i32
106; SI-NOT: v_rcp_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +0000107
108; EG-NOT: INT_TO_FLT
109; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000110define amdgpu_kernel void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000111 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000112 %num = load i32, i32 addrspace(1) * %in, align 4
113 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Vesely4a33bc62014-08-12 17:31:17 +0000114 %num.i24.0 = shl i32 %num, 7
115 %den.i24.0 = shl i32 %den, 8
116 %num.i24 = ashr i32 %num.i24.0, 7
117 %den.i24 = ashr i32 %den.i24.0, 8
118 %result = sdiv i32 %num.i24, %den.i24
119 store i32 %result, i32 addrspace(1)* %out, align 4
120 ret void
121}
122
Tom Stellard79243d92014-10-01 17:15:17 +0000123; FUNC-LABEL: {{^}}srem24_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000124; SI: v_cvt_f32_i32
125; SI: v_cvt_f32_i32
126; SI: v_rcp_f32
127; SI: v_cvt_i32_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +0000128
129; EG: INT_TO_FLT
130; EG-DAG: INT_TO_FLT
131; EG-DAG: RECIP_IEEE
132; EG: FLT_TO_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000133define amdgpu_kernel void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000134 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
David Blaikiea79ac142015-02-27 21:17:42 +0000135 %num = load i8, i8 addrspace(1) * %in
136 %den = load i8, i8 addrspace(1) * %den_ptr
Jan Vesely4a33bc62014-08-12 17:31:17 +0000137 %result = srem i8 %num, %den
138 store i8 %result, i8 addrspace(1)* %out
139 ret void
140}
141
Tom Stellard79243d92014-10-01 17:15:17 +0000142; FUNC-LABEL: {{^}}srem24_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000143; SI: v_cvt_f32_i32
144; SI: v_cvt_f32_i32
145; SI: v_rcp_f32
146; SI: v_cvt_i32_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +0000147
148; EG: INT_TO_FLT
149; EG-DAG: INT_TO_FLT
150; EG-DAG: RECIP_IEEE
151; EG: FLT_TO_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000152define amdgpu_kernel void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000153 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
David Blaikiea79ac142015-02-27 21:17:42 +0000154 %num = load i16, i16 addrspace(1) * %in, align 2
155 %den = load i16, i16 addrspace(1) * %den_ptr, align 2
Jan Vesely4a33bc62014-08-12 17:31:17 +0000156 %result = srem i16 %num, %den
157 store i16 %result, i16 addrspace(1)* %out, align 2
158 ret void
159}
160
Tom Stellard79243d92014-10-01 17:15:17 +0000161; FUNC-LABEL: {{^}}srem24_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000162; SI: v_cvt_f32_i32
163; SI: v_cvt_f32_i32
164; SI: v_rcp_f32
165; SI: v_cvt_i32_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +0000166
167; EG: INT_TO_FLT
168; EG-DAG: INT_TO_FLT
169; EG-DAG: RECIP_IEEE
170; EG: FLT_TO_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000171define amdgpu_kernel void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000172 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000173 %num = load i32, i32 addrspace(1) * %in, align 4
174 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Vesely4a33bc62014-08-12 17:31:17 +0000175 %num.i24.0 = shl i32 %num, 8
176 %den.i24.0 = shl i32 %den, 8
177 %num.i24 = ashr i32 %num.i24.0, 8
178 %den.i24 = ashr i32 %den.i24.0, 8
179 %result = srem i32 %num.i24, %den.i24
180 store i32 %result, i32 addrspace(1)* %out, align 4
181 ret void
182}
183
Matt Arsenault81a70952016-05-21 01:53:33 +0000184; FUNC-LABEL: {{^}}no_srem25_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000185; SI-NOT: v_cvt_f32_i32
186; SI-NOT: v_rcp_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +0000187
188; EG-NOT: INT_TO_FLT
189; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000190define amdgpu_kernel void @no_srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000191 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000192 %num = load i32, i32 addrspace(1) * %in, align 4
193 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Vesely4a33bc62014-08-12 17:31:17 +0000194 %num.i24.0 = shl i32 %num, 7
195 %den.i24.0 = shl i32 %den, 7
196 %num.i24 = ashr i32 %num.i24.0, 7
197 %den.i24 = ashr i32 %den.i24.0, 7
198 %result = srem i32 %num.i24, %den.i24
199 store i32 %result, i32 addrspace(1)* %out, align 4
200 ret void
201}
202
Matt Arsenault81a70952016-05-21 01:53:33 +0000203; FUNC-LABEL: {{^}}no_sdiv25_i24_i25_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000204; SI-NOT: v_cvt_f32_i32
205; SI-NOT: v_rcp_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +0000206
207; EG-NOT: INT_TO_FLT
208; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000209define amdgpu_kernel void @no_sdiv25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000210 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000211 %num = load i32, i32 addrspace(1) * %in, align 4
212 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Vesely4a33bc62014-08-12 17:31:17 +0000213 %num.i24.0 = shl i32 %num, 8
Matt Arsenault81a70952016-05-21 01:53:33 +0000214 %den.i25.0 = shl i32 %den, 7
Jan Vesely4a33bc62014-08-12 17:31:17 +0000215 %num.i24 = ashr i32 %num.i24.0, 8
Matt Arsenault81a70952016-05-21 01:53:33 +0000216 %den.i25 = ashr i32 %den.i25.0, 7
217 %result = sdiv i32 %num.i24, %den.i25
Jan Vesely4a33bc62014-08-12 17:31:17 +0000218 store i32 %result, i32 addrspace(1)* %out, align 4
219 ret void
220}
221
Matt Arsenault81a70952016-05-21 01:53:33 +0000222; FUNC-LABEL: {{^}}no_sdiv25_i25_i24_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000223; SI-NOT: v_cvt_f32_i32
224; SI-NOT: v_rcp_f32
Jan Vesely4a33bc62014-08-12 17:31:17 +0000225
226; EG-NOT: INT_TO_FLT
227; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000228define amdgpu_kernel void @no_sdiv25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000229 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000230 %num = load i32, i32 addrspace(1) * %in, align 4
231 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Matt Arsenault81a70952016-05-21 01:53:33 +0000232 %num.i25.0 = shl i32 %num, 7
Jan Vesely4a33bc62014-08-12 17:31:17 +0000233 %den.i24.0 = shl i32 %den, 8
Matt Arsenault81a70952016-05-21 01:53:33 +0000234 %num.i25 = ashr i32 %num.i25.0, 7
Jan Vesely4a33bc62014-08-12 17:31:17 +0000235 %den.i24 = ashr i32 %den.i24.0, 8
Matt Arsenault81a70952016-05-21 01:53:33 +0000236 %result = sdiv i32 %num.i25, %den.i24
237 store i32 %result, i32 addrspace(1)* %out, align 4
238 ret void
239}
240
241; FUNC-LABEL: {{^}}no_srem25_i24_i25_i32:
242; SI-NOT: v_cvt_f32_i32
243; SI-NOT: v_rcp_f32
244
245; EG-NOT: INT_TO_FLT
246; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000247define amdgpu_kernel void @no_srem25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +0000248 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
249 %num = load i32, i32 addrspace(1) * %in, align 4
250 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
251 %num.i24.0 = shl i32 %num, 8
252 %den.i25.0 = shl i32 %den, 7
253 %num.i24 = ashr i32 %num.i24.0, 8
254 %den.i25 = ashr i32 %den.i25.0, 7
255 %result = srem i32 %num.i24, %den.i25
256 store i32 %result, i32 addrspace(1)* %out, align 4
257 ret void
258}
259
260; FUNC-LABEL: {{^}}no_srem25_i25_i24_i32:
261; SI-NOT: v_cvt_f32_i32
262; SI-NOT: v_rcp_f32
263
264; EG-NOT: INT_TO_FLT
265; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000266define amdgpu_kernel void @no_srem25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +0000267 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
268 %num = load i32, i32 addrspace(1) * %in, align 4
269 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
270 %num.i25.0 = shl i32 %num, 7
271 %den.i24.0 = shl i32 %den, 8
272 %num.i25 = ashr i32 %num.i25.0, 7
273 %den.i24 = ashr i32 %den.i24.0, 8
274 %result = srem i32 %num.i25, %den.i24
275 store i32 %result, i32 addrspace(1)* %out, align 4
276 ret void
277}
278
279; FUNC-LABEL: {{^}}srem25_i24_i11_i32:
280; SI: v_cvt_f32_i32
281; SI: v_rcp_f32
282; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 24
283
284; EG: INT_TO_FLT
285; EG: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000286define amdgpu_kernel void @srem25_i24_i11_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +0000287 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
288 %num = load i32, i32 addrspace(1) * %in, align 4
289 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
290 %num.i24.0 = shl i32 %num, 8
291 %den.i11.0 = shl i32 %den, 21
292 %num.i24 = ashr i32 %num.i24.0, 8
293 %den.i11 = ashr i32 %den.i11.0, 21
294 %result = srem i32 %num.i24, %den.i11
295 store i32 %result, i32 addrspace(1)* %out, align 4
296 ret void
297}
298
299; FUNC-LABEL: {{^}}srem25_i11_i24_i32:
300; SI: v_cvt_f32_i32
301; SI: v_rcp_f32
302; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 24
303
304; EG: INT_TO_FLT
305; EG: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000306define amdgpu_kernel void @srem25_i11_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +0000307 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
308 %num = load i32, i32 addrspace(1) * %in, align 4
309 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
310 %num.i11.0 = shl i32 %num, 21
311 %den.i24.0 = shl i32 %den, 8
312 %num.i11 = ashr i32 %num.i11.0, 21
313 %den.i24 = ashr i32 %den.i24.0, 8
314 %result = srem i32 %num.i11, %den.i24
315 store i32 %result, i32 addrspace(1)* %out, align 4
316 ret void
317}
318
319; FUNC-LABEL: {{^}}srem25_i17_i12_i32:
320; SI: v_cvt_f32_i32
321; SI: v_rcp_f32
322; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 17
323
324; EG: INT_TO_FLT
325; EG: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000326define amdgpu_kernel void @srem25_i17_i12_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +0000327 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
328 %num = load i32, i32 addrspace(1) * %in, align 4
329 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
330 %num.i17.0 = shl i32 %num, 15
331 %den.i12.0 = shl i32 %den, 20
332 %num.i17 = ashr i32 %num.i17.0, 15
333 %den.i12 = ashr i32 %den.i12.0, 20
334 %result = sdiv i32 %num.i17, %den.i12
Jan Vesely4a33bc62014-08-12 17:31:17 +0000335 store i32 %result, i32 addrspace(1)* %out, align 4
336 ret void
337}