blob: d129ca5c140afae94288e84156b2f65623c7299f [file] [log] [blame]
Alexander Timofeev982aee62017-07-04 17:32:00 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00002; XUN: llc -march=amdgcn -mcpu=tonga -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; FIXME: This leaves behind a now unnecessary and with exec
5
6; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle:
7; GCN: buffer_load_dword [[VAL:v[0-9]+]]
8; GCN: v_cmp_neq_f32_e32 vcc, 1.0, [[VAL]]
9; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], [[VAL]], [[VAL]]
10; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], [[ADD]], [[VAL]], vcc
11; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @test_vccnz_ifcvt_triangle(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +000013entry:
14 %v = load float, float addrspace(1)* %in
15 %cc = fcmp oeq float %v, 1.000000e+00
16 br i1 %cc, label %if, label %endif
17
18if:
19 %u = fadd float %v, %v
20 br label %endif
21
22endif:
23 %r = phi float [ %v, %entry ], [ %u, %if ]
24 store float %r, float addrspace(1)* %out
25 ret void
26}
27
28; GCN-LABEL: {{^}}test_vccnz_ifcvt_diamond:
29; GCN: buffer_load_dword [[VAL:v[0-9]+]]
30; GCN: v_cmp_neq_f32_e32 vcc, 1.0, [[VAL]]
31; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[VAL]], [[VAL]]
32; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VAL]], [[VAL]]
33; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], [[ADD]], [[MUL]], vcc
34; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000035define amdgpu_kernel void @test_vccnz_ifcvt_diamond(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +000036entry:
37 %v = load float, float addrspace(1)* %in
38 %cc = fcmp oeq float %v, 1.000000e+00
39 br i1 %cc, label %if, label %else
40
41if:
42 %u0 = fadd float %v, %v
43 br label %endif
44
45else:
46 %u1 = fmul float %v, %v
47 br label %endif
48
49endif:
50 %r = phi float [ %u0, %if ], [ %u1, %else ]
51 store float %r, float addrspace(1)* %out
52 ret void
53}
54
55; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_vcc_clobber:
56; GCN: ; clobber vcc
57; GCN: v_cmp_neq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
58; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc
59; GCN: s_mov_b64 vcc, [[CMP]]
60; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000061define amdgpu_kernel void @test_vccnz_ifcvt_triangle_vcc_clobber(i32 addrspace(1)* %out, i32 addrspace(1)* %in, float %k) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +000062entry:
63 %v = load i32, i32 addrspace(1)* %in
64 %cc = fcmp oeq float %k, 1.000000e+00
65 br i1 %cc, label %if, label %endif
66
67if:
68 call void asm "; clobber $0", "~{VCC}"() #0
69 %u = add i32 %v, %v
70 br label %endif
71
72endif:
73 %r = phi i32 [ %v, %entry ], [ %u, %if ]
74 store i32 %r, i32 addrspace(1)* %out
75 ret void
76}
77
78; Longest chain of cheap instructions to convert
79; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_max_cheap:
80; GCN: v_mul_f32
81; GCN: v_mul_f32
82; GCN: v_mul_f32
83; GCN: v_mul_f32
84; GCN: v_mul_f32
85; GCN: v_mul_f32
86; GCN: v_mul_f32
87; GCN: v_mul_f32
88; GCN: v_mul_f32
89; GCN: v_cndmask_b32_e32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000090define amdgpu_kernel void @test_vccnz_ifcvt_triangle_max_cheap(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +000091entry:
92 %v = load float, float addrspace(1)* %in
93 %cc = fcmp oeq float %v, 1.000000e+00
94 br i1 %cc, label %if, label %endif
95
96if:
97 %u.0 = fmul float %v, %v
98 %u.1 = fmul float %v, %u.0
99 %u.2 = fmul float %v, %u.1
100 %u.3 = fmul float %v, %u.2
101 %u.4 = fmul float %v, %u.3
102 %u.5 = fmul float %v, %u.4
103 %u.6 = fmul float %v, %u.5
104 %u.7 = fmul float %v, %u.6
105 %u.8 = fmul float %v, %u.7
106 br label %endif
107
108endif:
109 %r = phi float [ %v, %entry ], [ %u.8, %if ]
110 store float %r, float addrspace(1)* %out
111 ret void
112}
113
114; Short chain of cheap instructions to not convert
115; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_min_expensive:
116; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
117
118; GCN: v_mul_f32
119; GCN: v_mul_f32
120; GCN: v_mul_f32
121; GCN: v_mul_f32
122; GCN: v_mul_f32
123; GCN: v_mul_f32
124; GCN: v_mul_f32
125; GCN: v_mul_f32
126; GCN: v_mul_f32
127; GCN: v_mul_f32
128
129; GCN: [[ENDIF]]:
130; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000131define amdgpu_kernel void @test_vccnz_ifcvt_triangle_min_expensive(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000132entry:
133 %v = load float, float addrspace(1)* %in
134 %cc = fcmp oeq float %v, 1.000000e+00
135 br i1 %cc, label %if, label %endif
136
137if:
138 %u.0 = fmul float %v, %v
139 %u.1 = fmul float %v, %u.0
140 %u.2 = fmul float %v, %u.1
141 %u.3 = fmul float %v, %u.2
142 %u.4 = fmul float %v, %u.3
143 %u.5 = fmul float %v, %u.4
144 %u.6 = fmul float %v, %u.5
145 %u.7 = fmul float %v, %u.6
146 %u.8 = fmul float %v, %u.7
147 %u.9 = fmul float %v, %u.8
148 br label %endif
149
150endif:
151 %r = phi float [ %v, %entry ], [ %u.9, %if ]
152 store float %r, float addrspace(1)* %out
153 ret void
154}
155
156; Should still branch over fdiv expansion
157; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_expensive:
158; GCN: v_cmp_neq_f32_e32
159; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
160
161; GCN: v_div_scale_f32
162
163; GCN: [[ENDIF]]:
164; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000165define amdgpu_kernel void @test_vccnz_ifcvt_triangle_expensive(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000166entry:
167 %v = load float, float addrspace(1)* %in
168 %cc = fcmp oeq float %v, 1.000000e+00
169 br i1 %cc, label %if, label %endif
170
171if:
172 %u = fdiv float %v, %v
173 br label %endif
174
175endif:
176 %r = phi float [ %v, %entry ], [ %u, %if ]
177 store float %r, float addrspace(1)* %out
178 ret void
179}
180
181; vcc branch with SGPR inputs
182; GCN-LABEL: {{^}}test_vccnz_sgpr_ifcvt_triangle:
183; GCN: v_cmp_neq_f32_e64
184; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
185
186; GCN: s_add_i32
187
188; GCN: [[ENDIF]]:
189; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000190define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(2)* %in, float %cnd) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000191entry:
192 %v = load i32, i32 addrspace(2)* %in
193 %cc = fcmp oeq float %cnd, 1.000000e+00
194 br i1 %cc, label %if, label %endif
195
196if:
197 %u = add i32 %v, %v
198 br label %endif
199
200endif:
201 %r = phi i32 [ %v, %entry ], [ %u, %if ]
202 store i32 %r, i32 addrspace(1)* %out
203 ret void
204
205}
206
207; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_constant_load:
208; GCN: v_cndmask_b32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000209define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(2)* %in) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000210entry:
211 %v = load float, float addrspace(2)* %in
212 %cc = fcmp oeq float %v, 1.000000e+00
213 br i1 %cc, label %if, label %endif
214
215if:
216 %u = fadd float %v, %v
217 br label %endif
218
219endif:
220 %r = phi float [ %v, %entry ], [ %u, %if ]
221 store float %r, float addrspace(1)* %out
222 ret void
223}
224
225; Due to broken cost heuristic, this is not if converted like
226; test_vccnz_ifcvt_triangle_constant_load even though it should be.
227
228; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_argload:
229; GCN: v_cndmask_b32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000230define amdgpu_kernel void @test_vccnz_ifcvt_triangle_argload(float addrspace(1)* %out, float %v) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000231entry:
232 %cc = fcmp oeq float %v, 1.000000e+00
233 br i1 %cc, label %if, label %endif
234
235if:
236 %u = fadd float %v, %v
237 br label %endif
238
239endif:
240 %r = phi float [ %v, %entry ], [ %u, %if ]
241 store float %r, float addrspace(1)* %out
242 ret void
243}
244
245; Scalar branch and scalar inputs
246; GCN-LABEL: {{^}}test_scc1_sgpr_ifcvt_triangle:
247; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x0
248; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]]
249; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
250; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[ADD]], [[VAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000251define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(2)* %in, i32 %cond) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000252entry:
253 %v = load i32, i32 addrspace(2)* %in
254 %cc = icmp eq i32 %cond, 1
255 br i1 %cc, label %if, label %endif
256
257if:
258 %u = add i32 %v, %v
259 br label %endif
260
261endif:
262 %r = phi i32 [ %v, %entry ], [ %u, %if ]
263 call void asm sideeffect "; reg use $0", "s"(i32 %r) #0
264 ret void
265}
266
267; FIXME: Should be able to use VALU compare and select
268; Scalar branch but VGPR select operands
269; GCN-LABEL: {{^}}test_scc1_vgpr_ifcvt_triangle:
270; GCN: s_cmp_lg_u32
271; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
272
273; GCN: v_add_f32_e32
274
275; GCN: [[ENDIF]]:
276; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000277define amdgpu_kernel void @test_scc1_vgpr_ifcvt_triangle(float addrspace(1)* %out, float addrspace(1)* %in, i32 %cond) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000278entry:
279 %v = load float, float addrspace(1)* %in
280 %cc = icmp eq i32 %cond, 1
281 br i1 %cc, label %if, label %endif
282
283if:
284 %u = fadd float %v, %v
285 br label %endif
286
287endif:
288 %r = phi float [ %v, %entry ], [ %u, %if ]
289 store float %r, float addrspace(1)* %out
290 ret void
291}
292
293; GCN-LABEL: {{^}}test_scc1_sgpr_ifcvt_triangle64:
294; GCN: s_add_u32
295; GCN: s_addc_u32
296; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
297; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000298define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(2)* %in, i32 %cond) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000299entry:
300 %v = load i64, i64 addrspace(2)* %in
301 %cc = icmp eq i32 %cond, 1
302 br i1 %cc, label %if, label %endif
303
304if:
305 %u = add i64 %v, %v
306 br label %endif
307
308endif:
309 %r = phi i64 [ %v, %entry ], [ %u, %if ]
310 call void asm sideeffect "; reg use $0", "s"(i64 %r) #0
311 ret void
312}
313
314; TODO: Can do s_cselect_b64; s_cselect_b32
315; GCN-LABEL: {{^}}test_scc1_sgpr_ifcvt_triangle96:
316; GCN: s_add_i32
317; GCN: s_add_i32
318; GCN: s_add_i32
319; GCN: s_add_i32
320; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
321; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
322; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000323define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(2)* %in, i32 %cond) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000324entry:
325 %v = load <3 x i32>, <3 x i32> addrspace(2)* %in
326 %cc = icmp eq i32 %cond, 1
327 br i1 %cc, label %if, label %endif
328
329if:
330 %u = add <3 x i32> %v, %v
331 br label %endif
332
333endif:
334 %r = phi <3 x i32> [ %v, %entry ], [ %u, %if ]
335 %r.ext = shufflevector <3 x i32> %r, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
336 call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) #0
337 ret void
338}
339
340; GCN-LABEL: {{^}}test_scc1_sgpr_ifcvt_triangle128:
341; GCN: s_add_i32
342; GCN: s_add_i32
343; GCN: s_add_i32
344; GCN: s_add_i32
345; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
346; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
347; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000348define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(2)* %in, i32 %cond) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000349entry:
350 %v = load <4 x i32>, <4 x i32> addrspace(2)* %in
351 %cc = icmp eq i32 %cond, 1
352 br i1 %cc, label %if, label %endif
353
354if:
355 %u = add <4 x i32> %v, %v
356 br label %endif
357
358endif:
359 %r = phi <4 x i32> [ %v, %entry ], [ %u, %if ]
360 call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r) #0
361 ret void
362}
363
364; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc_constant_select:
365; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
366; GCN: s_cselect_b32 s{{[0-9]+}}, 1, 0{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000367define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000368entry:
369 %cmp0 = icmp eq i32 %cond, 0
370 br i1 %cmp0, label %else, label %if
371
372if:
373 br label %done
374
375else:
376 br label %done
377
378done:
379 %value = phi i32 [0, %if], [1, %else]
380 store i32 %value, i32 addrspace(1)* %out
381 ret void
382}
383
384; GCN-LABEL: {{^}}ifcvt_undef_scc:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000385; GCN: {{^}}; %bb.0:
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000386; GCN-NEXT: s_load_dwordx2
387; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 1, 0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000388define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000389entry:
390 br i1 undef, label %else, label %if
391
392if:
393 br label %done
394
395else:
396 br label %done
397
398done:
399 %value = phi i32 [0, %if], [1, %else]
400 store i32 %value, i32 addrspace(1)* %out
401 ret void
402}
403
404; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle256:
405; GCN: v_cmp_neq_f32
406; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
407
408; GCN: v_add_i32
409; GCN: v_add_i32
410
411; GCN: [[ENDIF]]:
412; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000413define amdgpu_kernel void @test_vccnz_ifcvt_triangle256(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in, float %cnd) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000414entry:
415 %v = load <8 x i32>, <8 x i32> addrspace(1)* %in
416 %cc = fcmp oeq float %cnd, 1.000000e+00
417 br i1 %cc, label %if, label %endif
418
419if:
420 %u = add <8 x i32> %v, %v
421 br label %endif
422
423endif:
424 %r = phi <8 x i32> [ %v, %entry ], [ %u, %if ]
425 store <8 x i32> %r, <8 x i32> addrspace(1)* %out
426 ret void
427}
428
429; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle512:
430; GCN: v_cmp_neq_f32
431; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
432
433; GCN: v_add_i32
434; GCN: v_add_i32
435
436; GCN: [[ENDIF]]:
437; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000438define amdgpu_kernel void @test_vccnz_ifcvt_triangle512(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in, float %cnd) #0 {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000439entry:
440 %v = load <16 x i32>, <16 x i32> addrspace(1)* %in
441 %cc = fcmp oeq float %cnd, 1.000000e+00
442 br i1 %cc, label %if, label %endif
443
444if:
445 %u = add <16 x i32> %v, %v
446 br label %endif
447
448endif:
449 %r = phi <16 x i32> [ %v, %entry ], [ %u, %if ]
450 store <16 x i32> %r, <16 x i32> addrspace(1)* %out
451 ret void
452}
453
454attributes #0 = { nounwind }