blob: b00fff7dc183b21533487446c234bd4b8cf43373 [file] [log] [blame]
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +00001; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; Add an extra verifier runs. There were some cases where invalid IR
5; was produced but happened to be fixed by the later passes.
6
7; Make sure divergent control flow with multiple exits from a region
8; is properly handled. UnifyFunctionExitNodes should be run before
9; StructurizeCFG.
10
11; IR-LABEL: @multi_divergent_region_exit_ret_ret(
Matt Arsenault44746522017-04-24 20:25:01 +000012; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
13; IR: %2 = extractvalue { i1, i64 } %1, 0
14; IR: %3 = extractvalue { i1, i64 } %1, 1
15; IR: br i1 %2, label %LeafBlock1, label %Flow
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000016
17; IR: Flow:
Matt Arsenault44746522017-04-24 20:25:01 +000018; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
19; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
20; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
21; IR: %7 = extractvalue { i1, i64 } %6, 0
22; IR: %8 = extractvalue { i1, i64 } %6, 1
23; IR: br i1 %7, label %LeafBlock, label %Flow1
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000024
25; IR: LeafBlock:
26; IR: br label %Flow1
27
28; IR: LeafBlock1:
29; IR: br label %Flow{{$}}
30
31; IR: Flow2:
Matt Arsenault44746522017-04-24 20:25:01 +000032; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
33; IR: call void @llvm.amdgcn.end.cf(i64 %19)
34; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
35; IR: %13 = extractvalue { i1, i64 } %12, 0
36; IR: %14 = extractvalue { i1, i64 } %12, 1
37; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000038
39; IR: exit0:
40; IR: store volatile i32 9, i32 addrspace(1)* undef
41; IR: br label %UnifiedReturnBlock
42
43; IR: Flow1:
Matt Arsenault44746522017-04-24 20:25:01 +000044; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
45; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
46; IR: call void @llvm.amdgcn.end.cf(i64 %8)
47; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
48; IR: %18 = extractvalue { i1, i64 } %17, 0
49; IR: %19 = extractvalue { i1, i64 } %17, 1
50; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000051
52; IR: exit1:
53; IR: store volatile i32 17, i32 addrspace(3)* undef
54; IR: br label %Flow2
55
56; IR: UnifiedReturnBlock:
Matt Arsenault44746522017-04-24 20:25:01 +000057; IR: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000058; IR: ret void
59
60
61; GCN-LABEL: {{^}}multi_divergent_region_exit_ret_ret:
62; GCN: v_cmp_lt_i32_e32 vcc, 1
63; GCN: s_and_saveexec_b64
64; GCN: s_xor_b64
65
66
Matt Arsenault44746522017-04-24 20:25:01 +000067; FIXME: Why is this compare essentially repeated?
68; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]]
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000069; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
Matt Arsenault9aa45f02017-07-06 20:57:05 +000070; GCN: v_cmp_ne_u32_e32 vcc, 1, [[REG]]
71; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000072
73; GCN: ; %Flow1
74; GCN-NEXT: s_or_b64 exec, exec
75; GCN: v_cmp_ne_u32_e32 vcc, 0
76
77; GCN: ; %exit1
78; GCN: ds_write_b32
79
80; GCN: %Flow2
81; GCN-NEXT: s_or_b64 exec, exec
82; GCN: v_cmp_ne_u32_e32 vcc, 0
83; GCN-NEXT: s_and_saveexec_b64
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000084
85; GCN: ; %exit0
86; GCN: buffer_store_dword
87
88; GCN: ; %UnifiedReturnBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000089; GCN-NEXT: s_endpgm
90define amdgpu_kernel void @multi_divergent_region_exit_ret_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
91entry:
92 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
93 %tmp1 = add i32 0, %tmp
94 %tmp2 = zext i32 %tmp1 to i64
95 %tmp3 = add i64 0, %tmp2
96 %tmp4 = shl i64 %tmp3, 32
97 %tmp5 = ashr exact i64 %tmp4, 32
98 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
99 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
100 %tmp8 = sext i32 %tmp7 to i64
101 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
102 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
103 %tmp13 = zext i32 %tmp10 to i64
104 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
105 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
106 %Pivot = icmp slt i32 %tmp16, 2
107 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
108
109LeafBlock: ; preds = %entry
110 %SwitchLeaf = icmp eq i32 %tmp16, 1
111 br i1 %SwitchLeaf, label %exit0, label %exit1
112
113LeafBlock1: ; preds = %entry
114 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
115 br i1 %SwitchLeaf2, label %exit0, label %exit1
116
117exit0: ; preds = %LeafBlock, %LeafBlock1
118 store volatile i32 9, i32 addrspace(1)* undef
119 ret void
120
121exit1: ; preds = %LeafBlock, %LeafBlock1
122 store volatile i32 17, i32 addrspace(3)* undef
123 ret void
124}
125
126; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
Matt Arsenault44746522017-04-24 20:25:01 +0000127; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000128
Matt Arsenault44746522017-04-24 20:25:01 +0000129; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000130
Matt Arsenault44746522017-04-24 20:25:01 +0000131; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
132; IR: call void @llvm.amdgcn.end.cf(i64 %19)
133; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
134; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000135
136
137; IR: UnifiedUnreachableBlock:
138; IR-NEXT: unreachable
139
140
141; FIXME: Probably should insert an s_endpgm anyway.
142; GCN-LABEL: {{^}}multi_divergent_region_exit_unreachable_unreachable:
143; GCN: ; %UnifiedUnreachableBlock
144; GCN-NEXT: .Lfunc_end
145define amdgpu_kernel void @multi_divergent_region_exit_unreachable_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
146entry:
147 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
148 %tmp1 = add i32 0, %tmp
149 %tmp2 = zext i32 %tmp1 to i64
150 %tmp3 = add i64 0, %tmp2
151 %tmp4 = shl i64 %tmp3, 32
152 %tmp5 = ashr exact i64 %tmp4, 32
153 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
154 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
155 %tmp8 = sext i32 %tmp7 to i64
156 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
157 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
158 %tmp13 = zext i32 %tmp10 to i64
159 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
160 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
161 %Pivot = icmp slt i32 %tmp16, 2
162 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
163
164LeafBlock: ; preds = %entry
165 %SwitchLeaf = icmp eq i32 %tmp16, 1
166 br i1 %SwitchLeaf, label %exit0, label %exit1
167
168LeafBlock1: ; preds = %entry
169 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
170 br i1 %SwitchLeaf2, label %exit0, label %exit1
171
172exit0: ; preds = %LeafBlock, %LeafBlock1
173 store volatile i32 9, i32 addrspace(1)* undef
174 unreachable
175
176exit1: ; preds = %LeafBlock, %LeafBlock1
177 store volatile i32 17, i32 addrspace(3)* undef
178 unreachable
179}
180
181; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret(
Matt Arsenault44746522017-04-24 20:25:01 +0000182; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000183; IR: llvm.amdgcn.if
184; IR: br i1
185
186; IR: {{^}}Flow:
Matt Arsenault44746522017-04-24 20:25:01 +0000187; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
188; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
189; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
190; IR: br i1 %7, label %LeafBlock, label %Flow1
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000191
192; IR: {{^}}LeafBlock:
Matt Arsenault44746522017-04-24 20:25:01 +0000193; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1
194; IR: %9 = xor i1 %divergent.cond1, true
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000195; IR: br label %Flow1
196
197; IR: LeafBlock1:
Matt Arsenault44746522017-04-24 20:25:01 +0000198; IR: %uniform.cond0 = icmp eq i32 %arg3, 2
199; IR: %10 = xor i1 %uniform.cond0, true
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000200; IR: br label %Flow
201
202; IR: Flow2:
Matt Arsenault44746522017-04-24 20:25:01 +0000203; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
204; IR: call void @llvm.amdgcn.end.cf(i64 %19)
205; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
206; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000207
208; IR: exit0:
209; IR: store volatile i32 9, i32 addrspace(1)* undef
210; IR: br label %UnifiedReturnBlock
211
212; IR: {{^}}Flow1:
Matt Arsenault44746522017-04-24 20:25:01 +0000213; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
214; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
215; IR: call void @llvm.amdgcn.end.cf(i64 %8)
216; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
217; IR: %18 = extractvalue { i1, i64 } %17, 0
218; IR: %19 = extractvalue { i1, i64 } %17, 1
219; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000220
221; IR: exit1:
222; IR: store volatile i32 17, i32 addrspace(3)* undef
223; IR: br label %Flow2
224
225; IR: UnifiedReturnBlock:
Matt Arsenault44746522017-04-24 20:25:01 +0000226; IR: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000227; IR: ret void
228define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
229entry:
230 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
231 %tmp1 = add i32 0, %tmp
232 %tmp2 = zext i32 %tmp1 to i64
233 %tmp3 = add i64 0, %tmp2
234 %tmp4 = shl i64 %tmp3, 32
235 %tmp5 = ashr exact i64 %tmp4, 32
236 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
237 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
238 %tmp8 = sext i32 %tmp7 to i64
239 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
240 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
241 %tmp13 = zext i32 %tmp10 to i64
242 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
243 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
244 %divergent.cond0 = icmp slt i32 %tmp16, 2
245 br i1 %divergent.cond0, label %LeafBlock, label %LeafBlock1
246
247LeafBlock: ; preds = %entry
248 %divergent.cond1 = icmp eq i32 %tmp16, 1
249 br i1 %divergent.cond1, label %exit0, label %exit1
250
251LeafBlock1: ; preds = %entry
252 %uniform.cond0 = icmp eq i32 %arg3, 2
253 br i1 %uniform.cond0, label %exit0, label %exit1
254
255exit0: ; preds = %LeafBlock, %LeafBlock1
256 store volatile i32 9, i32 addrspace(1)* undef
257 ret void
258
259exit1: ; preds = %LeafBlock, %LeafBlock1
260 store volatile i32 17, i32 addrspace(3)* undef
261 ret void
262}
263
264; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
Matt Arsenault44746522017-04-24 20:25:01 +0000265; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
266; IR: br i1 %2, label %LeafBlock1, label %Flow
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000267
268; IR: Flow:
Matt Arsenault44746522017-04-24 20:25:01 +0000269; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
270; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
271; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000272
Matt Arsenault44746522017-04-24 20:25:01 +0000273; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
274; IR: call void @llvm.amdgcn.end.cf(i64 %19)
275; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000276
277define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
278entry:
279 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
280 %tmp1 = add i32 0, %tmp
281 %tmp2 = zext i32 %tmp1 to i64
282 %tmp3 = add i64 0, %tmp2
283 %tmp4 = shl i64 %tmp3, 32
284 %tmp5 = ashr exact i64 %tmp4, 32
285 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
286 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
287 %tmp8 = sext i32 %tmp7 to i64
288 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
289 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
290 %tmp13 = zext i32 %tmp10 to i64
291 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
292 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
293 %Pivot = icmp slt i32 %tmp16, 2
294 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
295
296LeafBlock: ; preds = %entry
297 %SwitchLeaf = icmp eq i32 %arg3, 1
298 br i1 %SwitchLeaf, label %exit0, label %exit1
299
300LeafBlock1: ; preds = %entry
301 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
302 br i1 %SwitchLeaf2, label %exit0, label %exit1
303
304exit0: ; preds = %LeafBlock, %LeafBlock1
305 store volatile i32 9, i32 addrspace(1)* undef
306 ret void
307
308exit1: ; preds = %LeafBlock, %LeafBlock1
309 store volatile i32 17, i32 addrspace(3)* undef
310 ret void
311}
312
313; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
314; IR: Flow2:
Matt Arsenault44746522017-04-24 20:25:01 +0000315; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
316; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ]
317; IR: call void @llvm.amdgcn.end.cf(i64 %20)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000318
319; IR: UnifiedReturnBlock:
Matt Arsenault44746522017-04-24 20:25:01 +0000320; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ]
321; IR: call void @llvm.amdgcn.end.cf(i64 %15)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000322; IR: ret float %UnifiedRetVal
323define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
324entry:
325 %Pivot = icmp slt i32 %vgpr, 2
326 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
327
328LeafBlock: ; preds = %entry
329 %SwitchLeaf = icmp eq i32 %vgpr, 1
330 br i1 %SwitchLeaf, label %exit0, label %exit1
331
332LeafBlock1: ; preds = %entry
333 %SwitchLeaf2 = icmp eq i32 %vgpr, 2
334 br i1 %SwitchLeaf2, label %exit0, label %exit1
335
336exit0: ; preds = %LeafBlock, %LeafBlock1
337 store i32 9, i32 addrspace(1)* undef
338 ret float 1.0
339
340exit1: ; preds = %LeafBlock, %LeafBlock1
341 store i32 17, i32 addrspace(3)* undef
342 ret float 2.0
343}
344
345; IR-LABEL: @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(
346
347; GCN-LABEL: {{^}}uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value:
348; GCN: s_cmp_gt_i32 s0, 1
349; GCN: s_cbranch_scc0 [[FLOW:BB[0-9]+_[0-9]+]]
350
351; GCN: v_cmp_ne_u32_e32 vcc, 7, v0
352
353; GCN: {{^}}[[FLOW]]:
354; GCN: s_cbranch_vccnz [[FLOW1:BB[0-9]+]]
355
356; GCN: v_mov_b32_e32 v0, 2.0
357; GCN: s_or_b64 exec, exec
358; GCN: s_and_b64 exec, exec
359; GCN: v_mov_b32_e32 v0, 1.0
360
361; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
362; GCN-NEXT: s_or_b64 exec, exec
Mark Searles70359ac2017-06-02 14:19:25 +0000363; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000364; GCN-NEXT: ; return
365
366define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) #0 {
367entry:
368 %uniform.cond = icmp slt i32 %sgpr, 2
369 br i1 %uniform.cond, label %LeafBlock, label %LeafBlock1
370
371LeafBlock: ; preds = %entry
372 %divergent.cond0 = icmp eq i32 %vgpr, 3
373 br i1 %divergent.cond0, label %exit0, label %exit1
374
375LeafBlock1: ; preds = %entry
376 %divergent.cond1 = icmp eq i32 %vgpr, 7
377 br i1 %divergent.cond1, label %exit0, label %exit1
378
379exit0: ; preds = %LeafBlock, %LeafBlock1
380 store i32 9, i32 addrspace(1)* undef
381 ret float 1.0
382
383exit1: ; preds = %LeafBlock, %LeafBlock1
384 store i32 17, i32 addrspace(3)* undef
385 ret float 2.0
386}
387
388; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
Matt Arsenault44746522017-04-24 20:25:01 +0000389; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000390
391; IR: Flow:
Matt Arsenault44746522017-04-24 20:25:01 +0000392; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
393; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
394; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000395
396; IR: Flow2:
Matt Arsenault44746522017-04-24 20:25:01 +0000397; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
398; IR: call void @llvm.amdgcn.end.cf(i64 %19)
399; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
400; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000401
402; IR: exit0:
403; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
404; IR-NEXT: br label %UnifiedReturnBlock
405
406; IR: Flow1:
Matt Arsenault44746522017-04-24 20:25:01 +0000407; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
408; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
409; IR: call void @llvm.amdgcn.end.cf(i64 %8)
410; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
411; IR: %18 = extractvalue { i1, i64 } %17, 0
412; IR: %19 = extractvalue { i1, i64 } %17, 1
413; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000414
415; IR: exit1:
416; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef
417; IR-NEXT: call void @llvm.amdgcn.unreachable()
418; IR-NEXT: br label %Flow2
419
420; IR: UnifiedReturnBlock:
Matt Arsenault44746522017-04-24 20:25:01 +0000421; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000422; IR-NEXT: ret void
423define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
424entry:
425 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
426 %tmp1 = add i32 0, %tmp
427 %tmp2 = zext i32 %tmp1 to i64
428 %tmp3 = add i64 0, %tmp2
429 %tmp4 = shl i64 %tmp3, 32
430 %tmp5 = ashr exact i64 %tmp4, 32
431 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
432 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
433 %tmp8 = sext i32 %tmp7 to i64
434 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
435 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
436 %tmp13 = zext i32 %tmp10 to i64
437 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
438 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
439 %Pivot = icmp slt i32 %tmp16, 2
440 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
441
442LeafBlock: ; preds = %entry
443 %SwitchLeaf = icmp eq i32 %tmp16, 1
444 br i1 %SwitchLeaf, label %exit0, label %exit1
445
446LeafBlock1: ; preds = %entry
447 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
448 br i1 %SwitchLeaf2, label %exit0, label %exit1
449
450exit0: ; preds = %LeafBlock, %LeafBlock1
451 store volatile i32 17, i32 addrspace(3)* undef
452 ret void
453
454exit1: ; preds = %LeafBlock, %LeafBlock1
455 store volatile i32 9, i32 addrspace(1)* undef
456 unreachable
457}
458
459; The non-uniformity of the branch to the exiting blocks requires
460; looking at transitive predecessors.
461
462; IR-LABEL: @indirect_multi_divergent_region_exit_ret_unreachable(
463
464; IR: exit0: ; preds = %Flow2
465; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
466; IR-NEXT: br label %UnifiedReturnBlock
467
468
469; IR: indirect.exit1:
470; IR: %load = load volatile i32, i32 addrspace(1)* undef
471; IR: store volatile i32 %load, i32 addrspace(1)* undef
472; IR: store volatile i32 9, i32 addrspace(1)* undef
473; IR: call void @llvm.amdgcn.unreachable()
474; IR-NEXT: br label %Flow2
475
476; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2
Matt Arsenault44746522017-04-24 20:25:01 +0000477; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000478; IR-NEXT: ret void
479define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
480entry:
481 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
482 %tmp1 = add i32 0, %tmp
483 %tmp2 = zext i32 %tmp1 to i64
484 %tmp3 = add i64 0, %tmp2
485 %tmp4 = shl i64 %tmp3, 32
486 %tmp5 = ashr exact i64 %tmp4, 32
487 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
488 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
489 %tmp8 = sext i32 %tmp7 to i64
490 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
491 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
492 %tmp13 = zext i32 %tmp10 to i64
493 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
494 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
495 %Pivot = icmp slt i32 %tmp16, 2
496 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
497
498LeafBlock: ; preds = %entry
499 %SwitchLeaf = icmp eq i32 %tmp16, 1
500 br i1 %SwitchLeaf, label %exit0, label %indirect.exit1
501
502LeafBlock1: ; preds = %entry
503 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
504 br i1 %SwitchLeaf2, label %exit0, label %indirect.exit1
505
506exit0: ; preds = %LeafBlock, %LeafBlock1
507 store volatile i32 17, i32 addrspace(3)* undef
508 ret void
509
510indirect.exit1:
511 %load = load volatile i32, i32 addrspace(1)* undef
512 store volatile i32 %load, i32 addrspace(1)* undef
513 br label %exit1
514
515exit1: ; preds = %LeafBlock, %LeafBlock1
516 store volatile i32 9, i32 addrspace(1)* undef
517 unreachable
518}
519
520; IR-LABEL: @multi_divergent_region_exit_ret_switch(
521define amdgpu_kernel void @multi_divergent_region_exit_ret_switch(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
522entry:
523 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
524 %tmp1 = add i32 0, %tmp
525 %tmp2 = zext i32 %tmp1 to i64
526 %tmp3 = add i64 0, %tmp2
527 %tmp4 = shl i64 %tmp3, 32
528 %tmp5 = ashr exact i64 %tmp4, 32
529 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
530 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
531 %tmp8 = sext i32 %tmp7 to i64
532 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
533 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
534 %tmp13 = zext i32 %tmp10 to i64
535 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
536 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
537 switch i32 %tmp16, label %exit1
538 [ i32 1, label %LeafBlock
539 i32 2, label %LeafBlock1
540 i32 3, label %exit0 ]
541
542LeafBlock: ; preds = %entry
543 %SwitchLeaf = icmp eq i32 %tmp16, 1
544 br i1 %SwitchLeaf, label %exit0, label %exit1
545
546LeafBlock1: ; preds = %entry
547 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
548 br i1 %SwitchLeaf2, label %exit0, label %exit1
549
550exit0: ; preds = %LeafBlock, %LeafBlock1
551 store volatile i32 17, i32 addrspace(3)* undef
552 ret void
553
554exit1: ; preds = %LeafBlock, %LeafBlock1
555 store volatile i32 9, i32 addrspace(1)* undef
556 unreachable
557}
558
559; IR-LABEL: @divergent_multi_ret_nest_in_uniform_triangle(
560define amdgpu_kernel void @divergent_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
561entry:
562 %uniform.cond0 = icmp eq i32 %arg0, 4
563 br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
564
565divergent.multi.exit.region:
566 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
567 %divergent.cond0 = icmp eq i32 %id.x, 0
568 br i1 %divergent.cond0, label %divergent.ret0, label %divergent.ret1
569
570divergent.ret0:
571 store volatile i32 11, i32 addrspace(3)* undef
572 ret void
573
574divergent.ret1:
575 store volatile i32 42, i32 addrspace(3)* undef
576 ret void
577
578uniform.ret:
579 store volatile i32 9, i32 addrspace(1)* undef
580 ret void
581}
582
583; IR-LABEL: @divergent_complex_multi_ret_nest_in_uniform_triangle(
584define amdgpu_kernel void @divergent_complex_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
585entry:
586 %uniform.cond0 = icmp eq i32 %arg0, 4
587 br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
588
589divergent.multi.exit.region:
590 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
591 %divergent.cond0 = icmp eq i32 %id.x, 0
592 br i1 %divergent.cond0, label %divergent.if, label %divergent.ret1
593
594divergent.if:
595 %vgpr0 = load volatile float, float addrspace(1)* undef
596 %divergent.cond1 = fcmp ogt float %vgpr0, 1.0
597 br i1 %divergent.cond1, label %divergent.then, label %divergent.endif
598
599divergent.then:
600 %vgpr1 = load volatile float, float addrspace(1)* undef
601 %divergent.cond2 = fcmp olt float %vgpr1, 4.0
602 store volatile i32 33, i32 addrspace(1)* undef
603 br i1 %divergent.cond2, label %divergent.ret0, label %divergent.endif
604
605divergent.endif:
606 store volatile i32 38, i32 addrspace(1)* undef
607 br label %divergent.ret0
608
609divergent.ret0:
610 store volatile i32 11, i32 addrspace(3)* undef
611 ret void
612
613divergent.ret1:
614 store volatile i32 42, i32 addrspace(3)* undef
615 ret void
616
617uniform.ret:
618 store volatile i32 9, i32 addrspace(1)* undef
619 ret void
620}
621
622; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle(
623; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region
Matt Arsenault44746522017-04-24 20:25:01 +0000624; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
625; IR: br i1 %8, label %uniform.if, label %Flow2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000626
627; IR: Flow: ; preds = %uniform.then, %uniform.if
Matt Arsenault44746522017-04-24 20:25:01 +0000628; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ]
629; IR: br i1 %11, label %uniform.endif, label %uniform.ret0
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000630
631; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2
Matt Arsenault44746522017-04-24 20:25:01 +0000632; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000633; IR-NEXT: ret void
634define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
635entry:
636 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
637 %divergent.cond0 = icmp eq i32 %id.x, 0
638 br i1 %divergent.cond0, label %uniform.multi.exit.region, label %divergent.ret
639
640uniform.multi.exit.region:
641 %uniform.cond0 = icmp eq i32 %arg0, 4
642 br i1 %uniform.cond0, label %uniform.if, label %uniform.ret1
643
644uniform.if:
645 %sgpr0 = load volatile i32, i32 addrspace(2)* undef
646 %uniform.cond1 = icmp slt i32 %sgpr0, 1
647 br i1 %uniform.cond1, label %uniform.then, label %uniform.endif
648
649uniform.then:
650 %sgpr1 = load volatile i32, i32 addrspace(2)* undef
651 %uniform.cond2 = icmp sge i32 %sgpr1, 4
652 store volatile i32 33, i32 addrspace(1)* undef
653 br i1 %uniform.cond2, label %uniform.ret0, label %uniform.endif
654
655uniform.endif:
656 store volatile i32 38, i32 addrspace(1)* undef
657 br label %uniform.ret0
658
659uniform.ret0:
660 store volatile i32 11, i32 addrspace(3)* undef
661 ret void
662
663uniform.ret1:
664 store volatile i32 42, i32 addrspace(3)* undef
665 ret void
666
667divergent.ret:
668 store volatile i32 9, i32 addrspace(1)* undef
669 ret void
670}
671
672; IR-LABEL: @multi_divergent_unreachable_exit(
673; IR: UnifiedUnreachableBlock:
674; IR-NEXT: call void @llvm.amdgcn.unreachable()
675; IR-NEXT: br label %UnifiedReturnBlock
676
677; IR: UnifiedReturnBlock:
678; IR-NEXT: call void @llvm.amdgcn.end.cf(i64
679; IR-NEXT: ret void
680define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 {
681bb:
682 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
683 switch i32 %tmp, label %bb3 [
684 i32 2, label %bb1
685 i32 0, label %bb2
686 ]
687
688bb1: ; preds = %bb
689 unreachable
690
691bb2: ; preds = %bb
692 unreachable
693
694bb3: ; preds = %bb
695 switch i32 undef, label %bb5 [
696 i32 2, label %bb4
697 ]
698
699bb4: ; preds = %bb3
700 ret void
701
702bb5: ; preds = %bb3
703 unreachable
704}
705
706declare i32 @llvm.amdgcn.workitem.id.x() #1
707
708attributes #0 = { nounwind }
709attributes #1 = { nounwind readnone }