blob: 82c27f204a47892b67882b616f9fd718a8e108ea [file] [log] [blame]
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +00001; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; Add an extra verifier runs. There were some cases where invalid IR
5; was produced but happened to be fixed by the later passes.
6
7; Make sure divergent control flow with multiple exits from a region
8; is properly handled. UnifyFunctionExitNodes should be run before
9; StructurizeCFG.
10
11; IR-LABEL: @multi_divergent_region_exit_ret_ret(
Matt Arsenault44746522017-04-24 20:25:01 +000012; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
13; IR: %2 = extractvalue { i1, i64 } %1, 0
14; IR: %3 = extractvalue { i1, i64 } %1, 1
15; IR: br i1 %2, label %LeafBlock1, label %Flow
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000016
17; IR: Flow:
Matt Arsenault44746522017-04-24 20:25:01 +000018; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
19; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
20; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
21; IR: %7 = extractvalue { i1, i64 } %6, 0
22; IR: %8 = extractvalue { i1, i64 } %6, 1
23; IR: br i1 %7, label %LeafBlock, label %Flow1
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000024
25; IR: LeafBlock:
26; IR: br label %Flow1
27
28; IR: LeafBlock1:
29; IR: br label %Flow{{$}}
30
31; IR: Flow2:
Matt Arsenault44746522017-04-24 20:25:01 +000032; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
33; IR: call void @llvm.amdgcn.end.cf(i64 %19)
34; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
35; IR: %13 = extractvalue { i1, i64 } %12, 0
36; IR: %14 = extractvalue { i1, i64 } %12, 1
37; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000038
39; IR: exit0:
40; IR: store volatile i32 9, i32 addrspace(1)* undef
41; IR: br label %UnifiedReturnBlock
42
43; IR: Flow1:
Matt Arsenault44746522017-04-24 20:25:01 +000044; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
45; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
46; IR: call void @llvm.amdgcn.end.cf(i64 %8)
47; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
48; IR: %18 = extractvalue { i1, i64 } %17, 0
49; IR: %19 = extractvalue { i1, i64 } %17, 1
50; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000051
52; IR: exit1:
53; IR: store volatile i32 17, i32 addrspace(3)* undef
54; IR: br label %Flow2
55
56; IR: UnifiedReturnBlock:
Matt Arsenault44746522017-04-24 20:25:01 +000057; IR: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000058; IR: ret void
59
60
61; GCN-LABEL: {{^}}multi_divergent_region_exit_ret_ret:
62; GCN: v_cmp_lt_i32_e32 vcc, 1
63; GCN: s_and_saveexec_b64
64; GCN: s_xor_b64
65
66
Matt Arsenault44746522017-04-24 20:25:01 +000067; FIXME: Why is this compare essentially repeated?
68; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]]
69; GCN-NEXT: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, [[REG]]
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000070; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
Matt Arsenault44746522017-04-24 20:25:01 +000071; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000072
73; GCN: ; %Flow1
74; GCN-NEXT: s_or_b64 exec, exec
75; GCN: v_cmp_ne_u32_e32 vcc, 0
76
77; GCN: ; %exit1
78; GCN: ds_write_b32
79
80; GCN: %Flow2
81; GCN-NEXT: s_or_b64 exec, exec
82; GCN: v_cmp_ne_u32_e32 vcc, 0
83; GCN-NEXT: s_and_saveexec_b64
84; GCN-NEXT: s_xor_b64
85
86; GCN: ; %exit0
87; GCN: buffer_store_dword
88
89; GCN: ; %UnifiedReturnBlock
90; GCN-NEXT: s_or_b64 exec, exec
91; GCN-NEXT: s_endpgm
92define amdgpu_kernel void @multi_divergent_region_exit_ret_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
93entry:
94 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
95 %tmp1 = add i32 0, %tmp
96 %tmp2 = zext i32 %tmp1 to i64
97 %tmp3 = add i64 0, %tmp2
98 %tmp4 = shl i64 %tmp3, 32
99 %tmp5 = ashr exact i64 %tmp4, 32
100 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
101 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
102 %tmp8 = sext i32 %tmp7 to i64
103 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
104 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
105 %tmp13 = zext i32 %tmp10 to i64
106 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
107 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
108 %Pivot = icmp slt i32 %tmp16, 2
109 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
110
111LeafBlock: ; preds = %entry
112 %SwitchLeaf = icmp eq i32 %tmp16, 1
113 br i1 %SwitchLeaf, label %exit0, label %exit1
114
115LeafBlock1: ; preds = %entry
116 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
117 br i1 %SwitchLeaf2, label %exit0, label %exit1
118
119exit0: ; preds = %LeafBlock, %LeafBlock1
120 store volatile i32 9, i32 addrspace(1)* undef
121 ret void
122
123exit1: ; preds = %LeafBlock, %LeafBlock1
124 store volatile i32 17, i32 addrspace(3)* undef
125 ret void
126}
127
128; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
Matt Arsenault44746522017-04-24 20:25:01 +0000129; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000130
Matt Arsenault44746522017-04-24 20:25:01 +0000131; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000132
Matt Arsenault44746522017-04-24 20:25:01 +0000133; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
134; IR: call void @llvm.amdgcn.end.cf(i64 %19)
135; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
136; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000137
138
139; IR: UnifiedUnreachableBlock:
140; IR-NEXT: unreachable
141
142
143; FIXME: Probably should insert an s_endpgm anyway.
144; GCN-LABEL: {{^}}multi_divergent_region_exit_unreachable_unreachable:
145; GCN: ; %UnifiedUnreachableBlock
146; GCN-NEXT: .Lfunc_end
147define amdgpu_kernel void @multi_divergent_region_exit_unreachable_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
148entry:
149 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
150 %tmp1 = add i32 0, %tmp
151 %tmp2 = zext i32 %tmp1 to i64
152 %tmp3 = add i64 0, %tmp2
153 %tmp4 = shl i64 %tmp3, 32
154 %tmp5 = ashr exact i64 %tmp4, 32
155 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
156 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
157 %tmp8 = sext i32 %tmp7 to i64
158 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
159 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
160 %tmp13 = zext i32 %tmp10 to i64
161 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
162 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
163 %Pivot = icmp slt i32 %tmp16, 2
164 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
165
166LeafBlock: ; preds = %entry
167 %SwitchLeaf = icmp eq i32 %tmp16, 1
168 br i1 %SwitchLeaf, label %exit0, label %exit1
169
170LeafBlock1: ; preds = %entry
171 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
172 br i1 %SwitchLeaf2, label %exit0, label %exit1
173
174exit0: ; preds = %LeafBlock, %LeafBlock1
175 store volatile i32 9, i32 addrspace(1)* undef
176 unreachable
177
178exit1: ; preds = %LeafBlock, %LeafBlock1
179 store volatile i32 17, i32 addrspace(3)* undef
180 unreachable
181}
182
183; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret(
Matt Arsenault44746522017-04-24 20:25:01 +0000184; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000185; IR: llvm.amdgcn.if
186; IR: br i1
187
188; IR: {{^}}Flow:
Matt Arsenault44746522017-04-24 20:25:01 +0000189; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
190; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
191; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
192; IR: br i1 %7, label %LeafBlock, label %Flow1
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000193
194; IR: {{^}}LeafBlock:
Matt Arsenault44746522017-04-24 20:25:01 +0000195; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1
196; IR: %9 = xor i1 %divergent.cond1, true
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000197; IR: br label %Flow1
198
199; IR: LeafBlock1:
Matt Arsenault44746522017-04-24 20:25:01 +0000200; IR: %uniform.cond0 = icmp eq i32 %arg3, 2
201; IR: %10 = xor i1 %uniform.cond0, true
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000202; IR: br label %Flow
203
204; IR: Flow2:
Matt Arsenault44746522017-04-24 20:25:01 +0000205; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
206; IR: call void @llvm.amdgcn.end.cf(i64 %19)
207; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
208; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000209
210; IR: exit0:
211; IR: store volatile i32 9, i32 addrspace(1)* undef
212; IR: br label %UnifiedReturnBlock
213
214; IR: {{^}}Flow1:
Matt Arsenault44746522017-04-24 20:25:01 +0000215; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
216; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
217; IR: call void @llvm.amdgcn.end.cf(i64 %8)
218; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
219; IR: %18 = extractvalue { i1, i64 } %17, 0
220; IR: %19 = extractvalue { i1, i64 } %17, 1
221; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000222
223; IR: exit1:
224; IR: store volatile i32 17, i32 addrspace(3)* undef
225; IR: br label %Flow2
226
227; IR: UnifiedReturnBlock:
Matt Arsenault44746522017-04-24 20:25:01 +0000228; IR: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000229; IR: ret void
230define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
231entry:
232 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
233 %tmp1 = add i32 0, %tmp
234 %tmp2 = zext i32 %tmp1 to i64
235 %tmp3 = add i64 0, %tmp2
236 %tmp4 = shl i64 %tmp3, 32
237 %tmp5 = ashr exact i64 %tmp4, 32
238 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
239 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
240 %tmp8 = sext i32 %tmp7 to i64
241 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
242 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
243 %tmp13 = zext i32 %tmp10 to i64
244 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
245 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
246 %divergent.cond0 = icmp slt i32 %tmp16, 2
247 br i1 %divergent.cond0, label %LeafBlock, label %LeafBlock1
248
249LeafBlock: ; preds = %entry
250 %divergent.cond1 = icmp eq i32 %tmp16, 1
251 br i1 %divergent.cond1, label %exit0, label %exit1
252
253LeafBlock1: ; preds = %entry
254 %uniform.cond0 = icmp eq i32 %arg3, 2
255 br i1 %uniform.cond0, label %exit0, label %exit1
256
257exit0: ; preds = %LeafBlock, %LeafBlock1
258 store volatile i32 9, i32 addrspace(1)* undef
259 ret void
260
261exit1: ; preds = %LeafBlock, %LeafBlock1
262 store volatile i32 17, i32 addrspace(3)* undef
263 ret void
264}
265
266; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
Matt Arsenault44746522017-04-24 20:25:01 +0000267; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
268; IR: br i1 %2, label %LeafBlock1, label %Flow
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000269
270; IR: Flow:
Matt Arsenault44746522017-04-24 20:25:01 +0000271; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
272; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
273; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000274
Matt Arsenault44746522017-04-24 20:25:01 +0000275; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
276; IR: call void @llvm.amdgcn.end.cf(i64 %19)
277; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000278
279define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
280entry:
281 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
282 %tmp1 = add i32 0, %tmp
283 %tmp2 = zext i32 %tmp1 to i64
284 %tmp3 = add i64 0, %tmp2
285 %tmp4 = shl i64 %tmp3, 32
286 %tmp5 = ashr exact i64 %tmp4, 32
287 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
288 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
289 %tmp8 = sext i32 %tmp7 to i64
290 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
291 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
292 %tmp13 = zext i32 %tmp10 to i64
293 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
294 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
295 %Pivot = icmp slt i32 %tmp16, 2
296 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
297
298LeafBlock: ; preds = %entry
299 %SwitchLeaf = icmp eq i32 %arg3, 1
300 br i1 %SwitchLeaf, label %exit0, label %exit1
301
302LeafBlock1: ; preds = %entry
303 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
304 br i1 %SwitchLeaf2, label %exit0, label %exit1
305
306exit0: ; preds = %LeafBlock, %LeafBlock1
307 store volatile i32 9, i32 addrspace(1)* undef
308 ret void
309
310exit1: ; preds = %LeafBlock, %LeafBlock1
311 store volatile i32 17, i32 addrspace(3)* undef
312 ret void
313}
314
315; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
316; IR: Flow2:
Matt Arsenault44746522017-04-24 20:25:01 +0000317; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
318; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ]
319; IR: call void @llvm.amdgcn.end.cf(i64 %20)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000320
321; IR: UnifiedReturnBlock:
Matt Arsenault44746522017-04-24 20:25:01 +0000322; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ]
323; IR: call void @llvm.amdgcn.end.cf(i64 %15)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000324; IR: ret float %UnifiedRetVal
325define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
326entry:
327 %Pivot = icmp slt i32 %vgpr, 2
328 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
329
330LeafBlock: ; preds = %entry
331 %SwitchLeaf = icmp eq i32 %vgpr, 1
332 br i1 %SwitchLeaf, label %exit0, label %exit1
333
334LeafBlock1: ; preds = %entry
335 %SwitchLeaf2 = icmp eq i32 %vgpr, 2
336 br i1 %SwitchLeaf2, label %exit0, label %exit1
337
338exit0: ; preds = %LeafBlock, %LeafBlock1
339 store i32 9, i32 addrspace(1)* undef
340 ret float 1.0
341
342exit1: ; preds = %LeafBlock, %LeafBlock1
343 store i32 17, i32 addrspace(3)* undef
344 ret float 2.0
345}
346
347; IR-LABEL: @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(
348
349; GCN-LABEL: {{^}}uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value:
350; GCN: s_cmp_gt_i32 s0, 1
351; GCN: s_cbranch_scc0 [[FLOW:BB[0-9]+_[0-9]+]]
352
353; GCN: v_cmp_ne_u32_e32 vcc, 7, v0
354
355; GCN: {{^}}[[FLOW]]:
356; GCN: s_cbranch_vccnz [[FLOW1:BB[0-9]+]]
357
358; GCN: v_mov_b32_e32 v0, 2.0
359; GCN: s_or_b64 exec, exec
360; GCN: s_and_b64 exec, exec
361; GCN: v_mov_b32_e32 v0, 1.0
362
363; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
364; GCN-NEXT: s_or_b64 exec, exec
Mark Searles70359ac2017-06-02 14:19:25 +0000365; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000366; GCN-NEXT: ; return
367
368define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) #0 {
369entry:
370 %uniform.cond = icmp slt i32 %sgpr, 2
371 br i1 %uniform.cond, label %LeafBlock, label %LeafBlock1
372
373LeafBlock: ; preds = %entry
374 %divergent.cond0 = icmp eq i32 %vgpr, 3
375 br i1 %divergent.cond0, label %exit0, label %exit1
376
377LeafBlock1: ; preds = %entry
378 %divergent.cond1 = icmp eq i32 %vgpr, 7
379 br i1 %divergent.cond1, label %exit0, label %exit1
380
381exit0: ; preds = %LeafBlock, %LeafBlock1
382 store i32 9, i32 addrspace(1)* undef
383 ret float 1.0
384
385exit1: ; preds = %LeafBlock, %LeafBlock1
386 store i32 17, i32 addrspace(3)* undef
387 ret float 2.0
388}
389
390; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
Matt Arsenault44746522017-04-24 20:25:01 +0000391; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000392
393; IR: Flow:
Matt Arsenault44746522017-04-24 20:25:01 +0000394; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
395; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
396; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000397
398; IR: Flow2:
Matt Arsenault44746522017-04-24 20:25:01 +0000399; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
400; IR: call void @llvm.amdgcn.end.cf(i64 %19)
401; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
402; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000403
404; IR: exit0:
405; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
406; IR-NEXT: br label %UnifiedReturnBlock
407
408; IR: Flow1:
Matt Arsenault44746522017-04-24 20:25:01 +0000409; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
410; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
411; IR: call void @llvm.amdgcn.end.cf(i64 %8)
412; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
413; IR: %18 = extractvalue { i1, i64 } %17, 0
414; IR: %19 = extractvalue { i1, i64 } %17, 1
415; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000416
417; IR: exit1:
418; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef
419; IR-NEXT: call void @llvm.amdgcn.unreachable()
420; IR-NEXT: br label %Flow2
421
422; IR: UnifiedReturnBlock:
Matt Arsenault44746522017-04-24 20:25:01 +0000423; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000424; IR-NEXT: ret void
425define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
426entry:
427 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
428 %tmp1 = add i32 0, %tmp
429 %tmp2 = zext i32 %tmp1 to i64
430 %tmp3 = add i64 0, %tmp2
431 %tmp4 = shl i64 %tmp3, 32
432 %tmp5 = ashr exact i64 %tmp4, 32
433 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
434 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
435 %tmp8 = sext i32 %tmp7 to i64
436 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
437 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
438 %tmp13 = zext i32 %tmp10 to i64
439 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
440 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
441 %Pivot = icmp slt i32 %tmp16, 2
442 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
443
444LeafBlock: ; preds = %entry
445 %SwitchLeaf = icmp eq i32 %tmp16, 1
446 br i1 %SwitchLeaf, label %exit0, label %exit1
447
448LeafBlock1: ; preds = %entry
449 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
450 br i1 %SwitchLeaf2, label %exit0, label %exit1
451
452exit0: ; preds = %LeafBlock, %LeafBlock1
453 store volatile i32 17, i32 addrspace(3)* undef
454 ret void
455
456exit1: ; preds = %LeafBlock, %LeafBlock1
457 store volatile i32 9, i32 addrspace(1)* undef
458 unreachable
459}
460
461; The non-uniformity of the branch to the exiting blocks requires
462; looking at transitive predecessors.
463
464; IR-LABEL: @indirect_multi_divergent_region_exit_ret_unreachable(
465
466; IR: exit0: ; preds = %Flow2
467; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
468; IR-NEXT: br label %UnifiedReturnBlock
469
470
471; IR: indirect.exit1:
472; IR: %load = load volatile i32, i32 addrspace(1)* undef
473; IR: store volatile i32 %load, i32 addrspace(1)* undef
474; IR: store volatile i32 9, i32 addrspace(1)* undef
475; IR: call void @llvm.amdgcn.unreachable()
476; IR-NEXT: br label %Flow2
477
478; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2
Matt Arsenault44746522017-04-24 20:25:01 +0000479; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000480; IR-NEXT: ret void
481define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
482entry:
483 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
484 %tmp1 = add i32 0, %tmp
485 %tmp2 = zext i32 %tmp1 to i64
486 %tmp3 = add i64 0, %tmp2
487 %tmp4 = shl i64 %tmp3, 32
488 %tmp5 = ashr exact i64 %tmp4, 32
489 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
490 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
491 %tmp8 = sext i32 %tmp7 to i64
492 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
493 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
494 %tmp13 = zext i32 %tmp10 to i64
495 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
496 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
497 %Pivot = icmp slt i32 %tmp16, 2
498 br i1 %Pivot, label %LeafBlock, label %LeafBlock1
499
500LeafBlock: ; preds = %entry
501 %SwitchLeaf = icmp eq i32 %tmp16, 1
502 br i1 %SwitchLeaf, label %exit0, label %indirect.exit1
503
504LeafBlock1: ; preds = %entry
505 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
506 br i1 %SwitchLeaf2, label %exit0, label %indirect.exit1
507
508exit0: ; preds = %LeafBlock, %LeafBlock1
509 store volatile i32 17, i32 addrspace(3)* undef
510 ret void
511
512indirect.exit1:
513 %load = load volatile i32, i32 addrspace(1)* undef
514 store volatile i32 %load, i32 addrspace(1)* undef
515 br label %exit1
516
517exit1: ; preds = %LeafBlock, %LeafBlock1
518 store volatile i32 9, i32 addrspace(1)* undef
519 unreachable
520}
521
522; IR-LABEL: @multi_divergent_region_exit_ret_switch(
523define amdgpu_kernel void @multi_divergent_region_exit_ret_switch(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
524entry:
525 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
526 %tmp1 = add i32 0, %tmp
527 %tmp2 = zext i32 %tmp1 to i64
528 %tmp3 = add i64 0, %tmp2
529 %tmp4 = shl i64 %tmp3, 32
530 %tmp5 = ashr exact i64 %tmp4, 32
531 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
532 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
533 %tmp8 = sext i32 %tmp7 to i64
534 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
535 %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
536 %tmp13 = zext i32 %tmp10 to i64
537 %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
538 %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
539 switch i32 %tmp16, label %exit1
540 [ i32 1, label %LeafBlock
541 i32 2, label %LeafBlock1
542 i32 3, label %exit0 ]
543
544LeafBlock: ; preds = %entry
545 %SwitchLeaf = icmp eq i32 %tmp16, 1
546 br i1 %SwitchLeaf, label %exit0, label %exit1
547
548LeafBlock1: ; preds = %entry
549 %SwitchLeaf2 = icmp eq i32 %tmp16, 2
550 br i1 %SwitchLeaf2, label %exit0, label %exit1
551
552exit0: ; preds = %LeafBlock, %LeafBlock1
553 store volatile i32 17, i32 addrspace(3)* undef
554 ret void
555
556exit1: ; preds = %LeafBlock, %LeafBlock1
557 store volatile i32 9, i32 addrspace(1)* undef
558 unreachable
559}
560
561; IR-LABEL: @divergent_multi_ret_nest_in_uniform_triangle(
562define amdgpu_kernel void @divergent_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
563entry:
564 %uniform.cond0 = icmp eq i32 %arg0, 4
565 br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
566
567divergent.multi.exit.region:
568 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
569 %divergent.cond0 = icmp eq i32 %id.x, 0
570 br i1 %divergent.cond0, label %divergent.ret0, label %divergent.ret1
571
572divergent.ret0:
573 store volatile i32 11, i32 addrspace(3)* undef
574 ret void
575
576divergent.ret1:
577 store volatile i32 42, i32 addrspace(3)* undef
578 ret void
579
580uniform.ret:
581 store volatile i32 9, i32 addrspace(1)* undef
582 ret void
583}
584
585; IR-LABEL: @divergent_complex_multi_ret_nest_in_uniform_triangle(
586define amdgpu_kernel void @divergent_complex_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
587entry:
588 %uniform.cond0 = icmp eq i32 %arg0, 4
589 br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
590
591divergent.multi.exit.region:
592 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
593 %divergent.cond0 = icmp eq i32 %id.x, 0
594 br i1 %divergent.cond0, label %divergent.if, label %divergent.ret1
595
596divergent.if:
597 %vgpr0 = load volatile float, float addrspace(1)* undef
598 %divergent.cond1 = fcmp ogt float %vgpr0, 1.0
599 br i1 %divergent.cond1, label %divergent.then, label %divergent.endif
600
601divergent.then:
602 %vgpr1 = load volatile float, float addrspace(1)* undef
603 %divergent.cond2 = fcmp olt float %vgpr1, 4.0
604 store volatile i32 33, i32 addrspace(1)* undef
605 br i1 %divergent.cond2, label %divergent.ret0, label %divergent.endif
606
607divergent.endif:
608 store volatile i32 38, i32 addrspace(1)* undef
609 br label %divergent.ret0
610
611divergent.ret0:
612 store volatile i32 11, i32 addrspace(3)* undef
613 ret void
614
615divergent.ret1:
616 store volatile i32 42, i32 addrspace(3)* undef
617 ret void
618
619uniform.ret:
620 store volatile i32 9, i32 addrspace(1)* undef
621 ret void
622}
623
624; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle(
625; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region
Matt Arsenault44746522017-04-24 20:25:01 +0000626; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
627; IR: br i1 %8, label %uniform.if, label %Flow2
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000628
629; IR: Flow: ; preds = %uniform.then, %uniform.if
Matt Arsenault44746522017-04-24 20:25:01 +0000630; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ]
631; IR: br i1 %11, label %uniform.endif, label %uniform.ret0
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000632
633; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2
Matt Arsenault44746522017-04-24 20:25:01 +0000634; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6)
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +0000635; IR-NEXT: ret void
636define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
637entry:
638 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
639 %divergent.cond0 = icmp eq i32 %id.x, 0
640 br i1 %divergent.cond0, label %uniform.multi.exit.region, label %divergent.ret
641
642uniform.multi.exit.region:
643 %uniform.cond0 = icmp eq i32 %arg0, 4
644 br i1 %uniform.cond0, label %uniform.if, label %uniform.ret1
645
646uniform.if:
647 %sgpr0 = load volatile i32, i32 addrspace(2)* undef
648 %uniform.cond1 = icmp slt i32 %sgpr0, 1
649 br i1 %uniform.cond1, label %uniform.then, label %uniform.endif
650
651uniform.then:
652 %sgpr1 = load volatile i32, i32 addrspace(2)* undef
653 %uniform.cond2 = icmp sge i32 %sgpr1, 4
654 store volatile i32 33, i32 addrspace(1)* undef
655 br i1 %uniform.cond2, label %uniform.ret0, label %uniform.endif
656
657uniform.endif:
658 store volatile i32 38, i32 addrspace(1)* undef
659 br label %uniform.ret0
660
661uniform.ret0:
662 store volatile i32 11, i32 addrspace(3)* undef
663 ret void
664
665uniform.ret1:
666 store volatile i32 42, i32 addrspace(3)* undef
667 ret void
668
669divergent.ret:
670 store volatile i32 9, i32 addrspace(1)* undef
671 ret void
672}
673
674; IR-LABEL: @multi_divergent_unreachable_exit(
675; IR: UnifiedUnreachableBlock:
676; IR-NEXT: call void @llvm.amdgcn.unreachable()
677; IR-NEXT: br label %UnifiedReturnBlock
678
679; IR: UnifiedReturnBlock:
680; IR-NEXT: call void @llvm.amdgcn.end.cf(i64
681; IR-NEXT: ret void
682define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 {
683bb:
684 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
685 switch i32 %tmp, label %bb3 [
686 i32 2, label %bb1
687 i32 0, label %bb2
688 ]
689
690bb1: ; preds = %bb
691 unreachable
692
693bb2: ; preds = %bb
694 unreachable
695
696bb3: ; preds = %bb
697 switch i32 undef, label %bb5 [
698 i32 2, label %bb4
699 ]
700
701bb4: ; preds = %bb3
702 ret void
703
704bb5: ; preds = %bb3
705 unreachable
706}
707
708declare i32 @llvm.amdgcn.workitem.id.x() #1
709
710attributes #0 = { nounwind }
711attributes #1 = { nounwind readnone }