blob: 6f144dcc6fd218ae0fdec440c6be33b84107b351 [file] [log] [blame]
Matt Arsenault81a70952016-05-21 01:53:33 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Jan Veselye5ca27d2014-08-12 17:31:20 +00003; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
Tom Stellard79243d92014-10-01 17:15:17 +00005; FUNC-LABEL: {{^}}udiv24_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +00006; SI: v_cvt_f32_ubyte
7; SI: v_cvt_f32_ubyte
8; SI: v_rcp_f32
9; SI: v_cvt_u32_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +000010
11; EG: UINT_TO_FLT
12; EG-DAG: UINT_TO_FLT
13; EG-DAG: RECIP_IEEE
14; EG: FLT_TO_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +000016 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
David Blaikiea79ac142015-02-27 21:17:42 +000017 %num = load i8, i8 addrspace(1) * %in
18 %den = load i8, i8 addrspace(1) * %den_ptr
Jan Veselye5ca27d2014-08-12 17:31:20 +000019 %result = udiv i8 %num, %den
20 store i8 %result, i8 addrspace(1)* %out
21 ret void
22}
23
Tom Stellard79243d92014-10-01 17:15:17 +000024; FUNC-LABEL: {{^}}udiv24_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +000025; SI: v_cvt_f32_u32
26; SI: v_cvt_f32_u32
27; SI: v_rcp_f32
28; SI: v_cvt_u32_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +000029
30; EG: UINT_TO_FLT
31; EG-DAG: UINT_TO_FLT
32; EG-DAG: RECIP_IEEE
33; EG: FLT_TO_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +000035 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
David Blaikiea79ac142015-02-27 21:17:42 +000036 %num = load i16, i16 addrspace(1) * %in, align 2
37 %den = load i16, i16 addrspace(1) * %den_ptr, align 2
Jan Veselye5ca27d2014-08-12 17:31:20 +000038 %result = udiv i16 %num, %den
39 store i16 %result, i16 addrspace(1)* %out, align 2
40 ret void
41}
42
Matt Arsenault81a70952016-05-21 01:53:33 +000043; FUNC-LABEL: {{^}}udiv23_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000044; SI: v_cvt_f32_u32
45; SI-DAG: v_cvt_f32_u32
46; SI-DAG: v_rcp_f32
47; SI: v_cvt_u32_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +000048
49; EG: UINT_TO_FLT
50; EG-DAG: UINT_TO_FLT
51; EG-DAG: RECIP_IEEE
52; EG: FLT_TO_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000053define amdgpu_kernel void @udiv23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +000054 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
55 %num = load i32, i32 addrspace(1) * %in, align 4
56 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
57 %num.i23.0 = shl i32 %num, 9
58 %den.i23.0 = shl i32 %den, 9
59 %num.i23 = lshr i32 %num.i23.0, 9
60 %den.i23 = lshr i32 %den.i23.0, 9
61 %result = udiv i32 %num.i23, %den.i23
62 store i32 %result, i32 addrspace(1)* %out, align 4
63 ret void
64}
65
66; FUNC-LABEL: {{^}}udiv24_i32:
67; SI: v_rcp_iflag
68; SI-NOT v_rcp_f32
69; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000070define amdgpu_kernel void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +000071 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +000072 %num = load i32, i32 addrspace(1) * %in, align 4
73 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Veselye5ca27d2014-08-12 17:31:20 +000074 %num.i24.0 = shl i32 %num, 8
75 %den.i24.0 = shl i32 %den, 8
76 %num.i24 = lshr i32 %num.i24.0, 8
77 %den.i24 = lshr i32 %den.i24.0, 8
78 %result = udiv i32 %num.i24, %den.i24
79 store i32 %result, i32 addrspace(1)* %out, align 4
80 ret void
81}
82
Matt Arsenault81a70952016-05-21 01:53:33 +000083; FUNC-LABEL: {{^}}no_udiv24_u23_u24_i32:
84; SI: v_rcp_iflag
85; SI-NOT v_rcp_f32
86; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000087define amdgpu_kernel void @no_udiv24_u23_u24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +000088 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
89 %num = load i32, i32 addrspace(1) * %in, align 4
90 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
91 %num.i23.0 = shl i32 %num, 9
92 %den.i24.0 = shl i32 %den, 8
93 %num.i23 = lshr i32 %num.i23.0, 9
94 %den.i24 = lshr i32 %den.i24.0, 8
95 %result = udiv i32 %num.i23, %den.i24
96 store i32 %result, i32 addrspace(1)* %out, align 4
97 ret void
98}
99
100; FUNC-LABEL: {{^}}no_udiv24_u24_u23_i32:
101; SI: v_rcp_iflag
102; SI-NOT v_rcp_f32
103; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000104define amdgpu_kernel void @no_udiv24_u24_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +0000105 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
106 %num = load i32, i32 addrspace(1) * %in, align 4
107 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
108 %num.i24.0 = shl i32 %num, 8
109 %den.i23.0 = shl i32 %den, 9
110 %num.i24 = lshr i32 %num.i24.0, 8
111 %den.i23 = lshr i32 %den.i23.0, 9
112 %result = udiv i32 %num.i24, %den.i23
113 store i32 %result, i32 addrspace(1)* %out, align 4
114 ret void
115}
116
Tom Stellard79243d92014-10-01 17:15:17 +0000117; FUNC-LABEL: {{^}}udiv25_i32:
Jan Veselye5ca27d2014-08-12 17:31:20 +0000118; RCP_IFLAG is for URECIP in the full 32b alg
Tom Stellard326d6ec2014-11-05 14:50:53 +0000119; SI: v_rcp_iflag
120; SI-NOT: v_rcp_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +0000121
122; EG-NOT: UINT_TO_FLT
123; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000124define amdgpu_kernel void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000125 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000126 %num = load i32, i32 addrspace(1) * %in, align 4
127 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Matt Arsenault81a70952016-05-21 01:53:33 +0000128 %num.i25.0 = shl i32 %num, 7
129 %den.i25.0 = shl i32 %den, 7
130 %num.i25 = lshr i32 %num.i25.0, 7
131 %den.i25 = lshr i32 %den.i25.0, 7
132 %result = udiv i32 %num.i25, %den.i25
Jan Veselye5ca27d2014-08-12 17:31:20 +0000133 store i32 %result, i32 addrspace(1)* %out, align 4
134 ret void
135}
136
Tom Stellard79243d92014-10-01 17:15:17 +0000137; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
Jan Veselye5ca27d2014-08-12 17:31:20 +0000138; RCP_IFLAG is for URECIP in the full 32b alg
Tom Stellard326d6ec2014-11-05 14:50:53 +0000139; SI: v_rcp_iflag
140; SI-NOT: v_rcp_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +0000141
142; EG-NOT: UINT_TO_FLT
143; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000144define amdgpu_kernel void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000145 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000146 %num = load i32, i32 addrspace(1) * %in, align 4
147 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Veselye5ca27d2014-08-12 17:31:20 +0000148 %num.i24.0 = shl i32 %num, 8
149 %den.i24.0 = shl i32 %den, 7
150 %num.i24 = lshr i32 %num.i24.0, 8
151 %den.i24 = lshr i32 %den.i24.0, 7
152 %result = udiv i32 %num.i24, %den.i24
153 store i32 %result, i32 addrspace(1)* %out, align 4
154 ret void
155}
156
Tom Stellard79243d92014-10-01 17:15:17 +0000157; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
Jan Veselye5ca27d2014-08-12 17:31:20 +0000158; RCP_IFLAG is for URECIP in the full 32b alg
Tom Stellard326d6ec2014-11-05 14:50:53 +0000159; SI: v_rcp_iflag
160; SI-NOT: v_rcp_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +0000161
162; EG-NOT: UINT_TO_FLT
163; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000164define amdgpu_kernel void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000165 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000166 %num = load i32, i32 addrspace(1) * %in, align 4
167 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Veselye5ca27d2014-08-12 17:31:20 +0000168 %num.i24.0 = shl i32 %num, 7
169 %den.i24.0 = shl i32 %den, 8
170 %num.i24 = lshr i32 %num.i24.0, 7
171 %den.i24 = lshr i32 %den.i24.0, 8
172 %result = udiv i32 %num.i24, %den.i24
173 store i32 %result, i32 addrspace(1)* %out, align 4
174 ret void
175}
176
Tom Stellard79243d92014-10-01 17:15:17 +0000177; FUNC-LABEL: {{^}}urem24_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000178; SI: v_cvt_f32_ubyte
179; SI: v_cvt_f32_ubyte
180; SI: v_rcp_f32
181; SI: v_cvt_u32_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +0000182
183; EG: UINT_TO_FLT
184; EG-DAG: UINT_TO_FLT
185; EG-DAG: RECIP_IEEE
186; EG: FLT_TO_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000187define amdgpu_kernel void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000188 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
David Blaikiea79ac142015-02-27 21:17:42 +0000189 %num = load i8, i8 addrspace(1) * %in
190 %den = load i8, i8 addrspace(1) * %den_ptr
Jan Veselye5ca27d2014-08-12 17:31:20 +0000191 %result = urem i8 %num, %den
192 store i8 %result, i8 addrspace(1)* %out
193 ret void
194}
195
Tom Stellard79243d92014-10-01 17:15:17 +0000196; FUNC-LABEL: {{^}}urem24_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000197; SI: v_cvt_f32_u32
198; SI: v_cvt_f32_u32
199; SI: v_rcp_f32
200; SI: v_cvt_u32_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +0000201
202; EG: UINT_TO_FLT
203; EG-DAG: UINT_TO_FLT
204; EG-DAG: RECIP_IEEE
205; EG: FLT_TO_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000206define amdgpu_kernel void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000207 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
David Blaikiea79ac142015-02-27 21:17:42 +0000208 %num = load i16, i16 addrspace(1) * %in, align 2
209 %den = load i16, i16 addrspace(1) * %den_ptr, align 2
Jan Veselye5ca27d2014-08-12 17:31:20 +0000210 %result = urem i16 %num, %den
211 store i16 %result, i16 addrspace(1)* %out, align 2
212 ret void
213}
214
Tom Stellard79243d92014-10-01 17:15:17 +0000215; FUNC-LABEL: {{^}}urem24_i32:
Matt Arsenault81a70952016-05-21 01:53:33 +0000216; SI-NOT: v_rcp_f32
217; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000218define amdgpu_kernel void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000219 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000220 %num = load i32, i32 addrspace(1) * %in, align 4
221 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Veselye5ca27d2014-08-12 17:31:20 +0000222 %num.i24.0 = shl i32 %num, 8
223 %den.i24.0 = shl i32 %den, 8
224 %num.i24 = lshr i32 %num.i24.0, 8
225 %den.i24 = lshr i32 %den.i24.0, 8
226 %result = urem i32 %num.i24, %den.i24
227 store i32 %result, i32 addrspace(1)* %out, align 4
228 ret void
229}
230
Tom Stellard79243d92014-10-01 17:15:17 +0000231; FUNC-LABEL: {{^}}urem25_i32:
Jan Veselye5ca27d2014-08-12 17:31:20 +0000232; RCP_IFLAG is for URECIP in the full 32b alg
Tom Stellard326d6ec2014-11-05 14:50:53 +0000233; SI: v_rcp_iflag
234; SI-NOT: v_rcp_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +0000235
236; EG-NOT: UINT_TO_FLT
237; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000238define amdgpu_kernel void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000239 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000240 %num = load i32, i32 addrspace(1) * %in, align 4
241 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Veselye5ca27d2014-08-12 17:31:20 +0000242 %num.i24.0 = shl i32 %num, 7
243 %den.i24.0 = shl i32 %den, 7
244 %num.i24 = lshr i32 %num.i24.0, 7
245 %den.i24 = lshr i32 %den.i24.0, 7
246 %result = urem i32 %num.i24, %den.i24
247 store i32 %result, i32 addrspace(1)* %out, align 4
248 ret void
249}
250
Tom Stellard79243d92014-10-01 17:15:17 +0000251; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
Jan Veselye5ca27d2014-08-12 17:31:20 +0000252; RCP_IFLAG is for URECIP in the full 32b alg
Tom Stellard326d6ec2014-11-05 14:50:53 +0000253; SI: v_rcp_iflag
254; SI-NOT: v_rcp_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +0000255
256; EG-NOT: UINT_TO_FLT
257; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000258define amdgpu_kernel void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000259 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000260 %num = load i32, i32 addrspace(1) * %in, align 4
261 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Veselye5ca27d2014-08-12 17:31:20 +0000262 %num.i24.0 = shl i32 %num, 8
263 %den.i24.0 = shl i32 %den, 7
264 %num.i24 = lshr i32 %num.i24.0, 8
265 %den.i24 = lshr i32 %den.i24.0, 7
266 %result = urem i32 %num.i24, %den.i24
267 store i32 %result, i32 addrspace(1)* %out, align 4
268 ret void
269}
270
Tom Stellard79243d92014-10-01 17:15:17 +0000271; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
Jan Veselye5ca27d2014-08-12 17:31:20 +0000272; RCP_IFLAG is for URECIP in the full 32b alg
Tom Stellard326d6ec2014-11-05 14:50:53 +0000273; SI: v_rcp_iflag
274; SI-NOT: v_rcp_f32
Jan Veselye5ca27d2014-08-12 17:31:20 +0000275
276; EG-NOT: UINT_TO_FLT
277; EG-NOT: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000278define amdgpu_kernel void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
David Blaikie79e6c742015-02-27 19:29:02 +0000279 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000280 %num = load i32, i32 addrspace(1) * %in, align 4
281 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
Jan Veselye5ca27d2014-08-12 17:31:20 +0000282 %num.i24.0 = shl i32 %num, 7
283 %den.i24.0 = shl i32 %den, 8
284 %num.i24 = lshr i32 %num.i24.0, 7
285 %den.i24 = lshr i32 %den.i24.0, 8
286 %result = urem i32 %num.i24, %den.i24
287 store i32 %result, i32 addrspace(1)* %out, align 4
288 ret void
289}
Matt Arsenault81a70952016-05-21 01:53:33 +0000290
291; FUNC-LABEL: {{^}}test_udiv24_u16_u23_i32:
292; SI-DAG: v_rcp_f32
293; SI-DAG: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff{{$}}
294; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]],
295
296; EG: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000297define amdgpu_kernel void @test_udiv24_u16_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +0000298 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
299 %num = load i32, i32 addrspace(1) * %in, align 4
300 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
301 %num.i16.0 = shl i32 %num, 16
302 %den.i23.0 = shl i32 %den, 9
303 %num.i16 = lshr i32 %num.i16.0, 16
304 %den.i23 = lshr i32 %den.i23.0, 9
305 %result = udiv i32 %num.i16, %den.i23
306 store i32 %result, i32 addrspace(1)* %out, align 4
307 ret void
308}
309
310; FUNC-LABEL: {{^}}test_udiv24_u23_u16_i32:
311; SI-DAG: v_rcp_f32
312; SI-DAG: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff{{$}}
313; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]],
314
315; EG: RECIP_IEEE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000316define amdgpu_kernel void @test_udiv24_u23_u16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault81a70952016-05-21 01:53:33 +0000317 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
318 %num = load i32, i32 addrspace(1) * %in, align 4
319 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
320 %num.i23.0 = shl i32 %num, 9
321 %den.i16.0 = shl i32 %den, 16
322 %num.i23 = lshr i32 %num.i23.0, 9
323 %den.i16 = lshr i32 %den.i16.0, 16
324 %result = udiv i32 %num.i23, %den.i16
325 store i32 %result, i32 addrspace(1)* %out, align 4
326 ret void
327}