blob: 85f198702d0e8f8ecbb31154660f335ecbb8d425 [file] [log] [blame]
Stanislav Mekhanoshin20279dc2018-06-20 20:24:20 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
Stanislav Mekhanoshin22ee1912018-06-21 16:02:05 +00003; GCN-LABEL: {{^}}select_and1:
4; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
5; GCN-NOT: v_and_b32
6; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
7define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
8 %c = icmp slt i32 %x, 11
9 %s = select i1 %c, i32 0, i32 -1
10 %a = and i32 %y, %s
11 store i32 %a, i32 addrspace(1)* %p, align 4
12 ret void
13}
14
15; GCN-LABEL: {{^}}select_and2:
16; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
17; GCN-NOT: v_and_b32
18; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
19define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
20 %c = icmp slt i32 %x, 11
21 %s = select i1 %c, i32 0, i32 -1
22 %a = and i32 %s, %y
23 store i32 %a, i32 addrspace(1)* %p, align 4
24 ret void
25}
26
27; GCN-LABEL: {{^}}select_and3:
28; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
29; GCN-NOT: v_and_b32
30; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
31define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
32 %c = icmp slt i32 %x, 11
33 %s = select i1 %c, i32 -1, i32 0
34 %a = and i32 %y, %s
35 store i32 %a, i32 addrspace(1)* %p, align 4
36 ret void
37}
38
39; GCN-LABEL: {{^}}select_and_v4:
40; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
41; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
42; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
43; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
44; GCN-NOT: v_and_b32
45; GCN: store_dword
46define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
47 %c = icmp slt i32 %x, 11
48 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
49 %a = and <4 x i32> %s, %y
50 store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
51 ret void
52}
53
54; GCN-LABEL: {{^}}select_or1:
55; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
56; GCN-NOT: v_or_b32
57; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
58define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
59 %c = icmp slt i32 %x, 11
60 %s = select i1 %c, i32 0, i32 -1
61 %a = or i32 %y, %s
62 store i32 %a, i32 addrspace(1)* %p, align 4
63 ret void
64}
65
66; GCN-LABEL: {{^}}select_or2:
67; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
68; GCN-NOT: v_or_b32
69; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
70define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
71 %c = icmp slt i32 %x, 11
72 %s = select i1 %c, i32 0, i32 -1
73 %a = or i32 %s, %y
74 store i32 %a, i32 addrspace(1)* %p, align 4
75 ret void
76}
77
78; GCN-LABEL: {{^}}select_or3:
79; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
80; GCN-NOT: v_or_b32
81; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
82define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
83 %c = icmp slt i32 %x, 11
84 %s = select i1 %c, i32 -1, i32 0
85 %a = or i32 %y, %s
86 store i32 %a, i32 addrspace(1)* %p, align 4
87 ret void
88}
89
90; GCN-LABEL: {{^}}select_or_v4:
91; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
92; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
93; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
94; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
95; GCN-NOT: v_or_b32
96; GCN: store_dword
97define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
98 %c = icmp slt i32 %x, 11
99 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
100 %a = or <4 x i32> %s, %y
101 store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
102 ret void
103}
104
Stanislav Mekhanoshin20279dc2018-06-20 20:24:20 +0000105; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
106; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
107define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
108 %sel = select i1 %cond, i32 -4, i32 3
109 %bo = sub i32 5, %sel
110 store i32 %bo, i32 addrspace(1)* %p, align 4
111 ret void
112}
113
114; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16:
115; TODO: shrink i16 constant. This is correct but suboptimal.
116; GCN: v_mov_b32_e32 [[T:v[0-9]+]], 0xffff0009
117; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[T]],
118define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(i16 addrspace(1)* %p, i1 %cond) {
119 %sel = select i1 %cond, i16 -4, i16 3
120 %bo = sub i16 5, %sel
121 store i16 %bo, i16 addrspace(1)* %p, align 2
122 ret void
123}
124
125; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg:
126; GCN: v_mov_b32_e32 [[F:v[0-9]+]], 0xfffff449
127; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, [[F]], -3,
128define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(i16 addrspace(1)* %p, i1 %cond) {
129 %sel = select i1 %cond, i16 4, i16 3000
130 %bo = sub i16 1, %sel
131 store i16 %bo, i16 addrspace(1)* %p, align 2
132 ret void
133}
134
135; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16:
136; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0x60002
137; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x50009
138; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
139define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(<2 x i16> addrspace(1)* %p, i1 %cond) {
140 %sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1>
141 %bo = sub <2 x i16> <i16 5, i16 7>, %sel
142 store <2 x i16> %bo, <2 x i16> addrspace(1)* %p, align 4
143 ret void
144}
145
146; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32:
147; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
148; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 6, 5,
149; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 10, 6,
150; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 14, 7,
151define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i32> addrspace(1)* %p, i1 %cond) {
152 %sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3>
153 %bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel
154 store <4 x i32> %bo, <4 x i32> addrspace(1)* %p, align 32
155 ret void
156}
157
158; GCN-LABEL: {{^}}sdiv_constant_sel_constants:
159; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 5, 0,
Stanislav Mekhanoshin67aa18f2018-06-28 15:59:18 +0000160define amdgpu_kernel void @sdiv_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) {
161 %sel = select i1 %cond, i64 121, i64 23
162 %bo = sdiv i64 120, %sel
163 store i64 %bo, i64 addrspace(1)* %p, align 8
Stanislav Mekhanoshin20279dc2018-06-20 20:24:20 +0000164 ret void
165}
166
167; GCN-LABEL: {{^}}udiv_constant_sel_constants:
168; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 5, 0,
Stanislav Mekhanoshin67aa18f2018-06-28 15:59:18 +0000169define amdgpu_kernel void @udiv_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) {
170 %sel = select i1 %cond, i64 -4, i64 23
171 %bo = udiv i64 120, %sel
172 store i64 %bo, i64 addrspace(1)* %p, align 8
Stanislav Mekhanoshin20279dc2018-06-20 20:24:20 +0000173 ret void
174}
175
176; GCN-LABEL: {{^}}srem_constant_sel_constants:
177; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 3, 33,
Stanislav Mekhanoshin67aa18f2018-06-28 15:59:18 +0000178define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) {
179 %sel = select i1 %cond, i64 34, i64 15
180 %bo = srem i64 33, %sel
181 store i64 %bo, i64 addrspace(1)* %p, align 8
Stanislav Mekhanoshin20279dc2018-06-20 20:24:20 +0000182 ret void
183}
184
185; GCN-LABEL: {{^}}urem_constant_sel_constants:
186; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 3, 33,
Stanislav Mekhanoshin67aa18f2018-06-28 15:59:18 +0000187define amdgpu_kernel void @urem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) {
188 %sel = select i1 %cond, i64 34, i64 15
189 %bo = urem i64 33, %sel
190 store i64 %bo, i64 addrspace(1)* %p, align 8
Stanislav Mekhanoshin20279dc2018-06-20 20:24:20 +0000191 ret void
192}
193
194; GCN-LABEL: {{^}}shl_constant_sel_constants:
195; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 8, 4,
196define amdgpu_kernel void @shl_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
197 %sel = select i1 %cond, i32 2, i32 3
198 %bo = shl i32 1, %sel
199 store i32 %bo, i32 addrspace(1)* %p, align 4
200 ret void
201}
202
203; GCN-LABEL: {{^}}lshr_constant_sel_constants:
204; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 8, 16,
205define amdgpu_kernel void @lshr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
206 %sel = select i1 %cond, i32 2, i32 3
207 %bo = lshr i32 64, %sel
208 store i32 %bo, i32 addrspace(1)* %p, align 4
209 ret void
210}
211
212; GCN-LABEL: {{^}}ashr_constant_sel_constants:
213; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 16, 32,
214define amdgpu_kernel void @ashr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
215 %sel = select i1 %cond, i32 2, i32 3
216 %bo = ashr i32 128, %sel
217 store i32 %bo, i32 addrspace(1)* %p, align 4
218 ret void
219}
220
221; GCN-LABEL: {{^}}fsub_constant_sel_constants:
222; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0,
223define amdgpu_kernel void @fsub_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
224 %sel = select i1 %cond, float -2.0, float 3.0
225 %bo = fsub float -1.0, %sel
226 store float %bo, float addrspace(1)* %p, align 4
227 ret void
228}
229
230; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16:
231; TODO: it shall be possible to fold constants with OpSel
232; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00
233; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400
234; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
235define amdgpu_kernel void @fsub_constant_sel_constants_f16(half addrspace(1)* %p, i1 %cond) {
236 %sel = select i1 %cond, half -2.0, half 3.0
237 %bo = fsub half -1.0, %sel
238 store half %bo, half addrspace(1)* %p, align 2
239 ret void
240}
241
242; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16:
243; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x45003c00
244; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, -2.0, [[T]],
245define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(<2 x half> addrspace(1)* %p, i1 %cond) {
246 %sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0>
247 %bo = fsub <2 x half> <half -1.0, half 2.0>, %sel
248 store <2 x half> %bo, <2 x half> addrspace(1)* %p, align 4
249 ret void
250}
251
252; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32:
253; GCN-DAG: v_mov_b32_e32 [[T2:v[0-9]+]], 0x40a00000
254; GCN-DAG: v_mov_b32_e32 [[T3:v[0-9]+]], 0x41100000
255; GCN-DAG: v_mov_b32_e32 [[T4:v[0-9]+]], 0x41500000
256; GCN-DAG: v_mov_b32_e32 [[F4:v[0-9]+]], 0x40c00000
257; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0,
258; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, [[T2]],
259; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[T3]],
260; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[F4]], [[T4]],
261define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(<4 x float> addrspace(1)* %p, i1 %cond) {
262 %sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0>
263 %bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel
264 store <4 x float> %bo, <4 x float> addrspace(1)* %p, align 32
265 ret void
266}
267
268; GCN-LABEL: {{^}}fdiv_constant_sel_constants:
269; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0,
270define amdgpu_kernel void @fdiv_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
271 %sel = select i1 %cond, float -4.0, float 2.0
272 %bo = fdiv float 8.0, %sel
273 store float %bo, float addrspace(1)* %p, align 4
274 ret void
275}
276
277; GCN-LABEL: {{^}}frem_constant_sel_constants:
278; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0,
279define amdgpu_kernel void @frem_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
280 %sel = select i1 %cond, float -4.0, float 3.0
281 %bo = frem float 5.0, %sel
282 store float %bo, float addrspace(1)* %p, align 4
283 ret void
284}