blob: 84c42e8bd1e068c312ad8938b43bbbb19b7ce6bb [file] [log] [blame]
Matt Arsenaultf14db7a2016-07-20 15:20:35 +00001; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6; OPT-LABEL: @break_loop(
7; OPT: bb1:
8; OPT: call i64 @llvm.amdgcn.break(i64
9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
10
11; OPT: bb4:
12; OPT: load volatile
Matt Arsenaultd3406bc2017-04-19 18:29:07 +000013; OPT: %cmp1 = icmp sge i32 %tmp, %load
Matt Arsenaultf14db7a2016-07-20 15:20:35 +000014; OPT: call i64 @llvm.amdgcn.if.break(
15; OPT: br label %Flow
16
17; OPT: Flow:
18; OPT: call i1 @llvm.amdgcn.loop(i64
19; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
20
21; OPT: bb9:
22; OPT: call void @llvm.amdgcn.end.cf(i64
23
24; TODO: Can remove exec fixes in return block
25; GCN-LABEL: {{^}}break_loop:
26; GCN: s_mov_b64 [[INITMASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
27
28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]]
Wei Ding74da3502017-04-12 23:51:47 +000030; GCN: v_cmp_lt_i32_e32 vcc, -1
31; GCN: s_and_b64 vcc, exec, vcc
32; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
Matt Arsenaultf14db7a2016-07-20 15:20:35 +000033
34; GCN: ; BB#2: ; %bb4
35; GCN: buffer_load_dword
36; GCN: v_cmp_ge_i32_e32 vcc,
37; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]]
38
39; GCN: [[FLOW]]:
40; GCN: s_mov_b64 [[INITMASK]], [[MASK]]
41; GCN: s_andn2_b64 exec, exec, [[MASK]]
42; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
43
44; GCN: ; BB#4: ; %bb9
45; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
46; GCN-NEXT: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000047define amdgpu_kernel void @break_loop(i32 %arg) #0 {
Matt Arsenaultf14db7a2016-07-20 15:20:35 +000048bb:
49 %id = call i32 @llvm.amdgcn.workitem.id.x()
50 %tmp = sub i32 %id, %arg
51 br label %bb1
52
53bb1:
54 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
55 %lsr.iv.next = add i32 %lsr.iv, 1
56 %cmp0 = icmp slt i32 %lsr.iv.next, 0
57 br i1 %cmp0, label %bb4, label %bb9
58
59bb4:
60 %load = load volatile i32, i32 addrspace(1)* undef, align 4
61 %cmp1 = icmp slt i32 %tmp, %load
62 br i1 %cmp1, label %bb1, label %bb9
63
64bb9:
65 ret void
66}
67
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +000068; OPT-LABEL: @undef_phi_cond_break_loop(
69; OPT: bb1:
70; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
71; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
72; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken)
73; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
74
75; OPT: bb4:
76; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
77; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
78; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
79; OPT-NEXT: br label %Flow
80
81; OPT: Flow:
82; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
83; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
84; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
85; OPT-NEXT: br i1 %2, label %bb9, label %bb1
86
87; OPT: bb9: ; preds = %Flow
88; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
89; OPT-NEXT: store volatile i32 7
90; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000091define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +000092bb:
93 %id = call i32 @llvm.amdgcn.workitem.id.x()
94 %tmp = sub i32 %id, %arg
95 br label %bb1
96
97bb1: ; preds = %Flow, %bb
98 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
99 %lsr.iv.next = add i32 %lsr.iv, 1
100 %cmp0 = icmp slt i32 %lsr.iv.next, 0
101 br i1 %cmp0, label %bb4, label %Flow
102
103bb4: ; preds = %bb1
104 %load = load volatile i32, i32 addrspace(1)* undef, align 4
105 %cmp1 = icmp sge i32 %tmp, %load
106 br label %Flow
107
108Flow: ; preds = %bb4, %bb1
109 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
110 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
111 br i1 %tmp3, label %bb9, label %bb1
112
113bb9: ; preds = %Flow
114 store volatile i32 7, i32 addrspace(3)* undef
115 ret void
116}
117
118; FIXME: ConstantExpr compare of address to null folds away
119@lds = addrspace(3) global i32 undef
120
121; OPT-LABEL: @constexpr_phi_cond_break_loop(
122; OPT: bb1:
123; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
124; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
125; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken)
126; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
127
128; OPT: bb4:
129; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
130; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
131; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
132; OPT-NEXT: br label %Flow
133
134; OPT: Flow:
135; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
136; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
137; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
138; OPT-NEXT: br i1 %2, label %bb9, label %bb1
139
140; OPT: bb9: ; preds = %Flow
141; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
142; OPT-NEXT: store volatile i32 7
143; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000144define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +0000145bb:
146 %id = call i32 @llvm.amdgcn.workitem.id.x()
147 %tmp = sub i32 %id, %arg
148 br label %bb1
149
150bb1: ; preds = %Flow, %bb
151 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
152 %lsr.iv.next = add i32 %lsr.iv, 1
153 %cmp0 = icmp slt i32 %lsr.iv.next, 0
154 br i1 %cmp0, label %bb4, label %Flow
155
156bb4: ; preds = %bb1
157 %load = load volatile i32, i32 addrspace(1)* undef, align 4
158 %cmp1 = icmp sge i32 %tmp, %load
159 br label %Flow
160
161Flow: ; preds = %bb4, %bb1
162 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
163 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
164 br i1 %tmp3, label %bb9, label %bb1
165
166bb9: ; preds = %Flow
167 store volatile i32 7, i32 addrspace(3)* undef
168 ret void
169}
170
171; OPT-LABEL: @true_phi_cond_break_loop(
172; OPT: bb1:
173; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
174; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
175; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken)
176; OPT: br i1 %cmp0, label %bb4, label %Flow
177
178; OPT: bb4:
179; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
180; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
181; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
182; OPT-NEXT: br label %Flow
183
184; OPT: Flow:
185; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
186; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
187; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
188; OPT-NEXT: br i1 %2, label %bb9, label %bb1
189
190; OPT: bb9: ; preds = %Flow
191; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
192; OPT-NEXT: store volatile i32 7
193; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000194define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +0000195bb:
196 %id = call i32 @llvm.amdgcn.workitem.id.x()
197 %tmp = sub i32 %id, %arg
198 br label %bb1
199
200bb1: ; preds = %Flow, %bb
201 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
202 %lsr.iv.next = add i32 %lsr.iv, 1
203 %cmp0 = icmp slt i32 %lsr.iv.next, 0
204 br i1 %cmp0, label %bb4, label %Flow
205
206bb4: ; preds = %bb1
207 %load = load volatile i32, i32 addrspace(1)* undef, align 4
208 %cmp1 = icmp sge i32 %tmp, %load
209 br label %Flow
210
211Flow: ; preds = %bb4, %bb1
212 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
213 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
214 br i1 %tmp3, label %bb9, label %bb1
215
216bb9: ; preds = %Flow
217 store volatile i32 7, i32 addrspace(3)* undef
218 ret void
219}
220
221; OPT-LABEL: @false_phi_cond_break_loop(
222; OPT: bb1:
223; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
224; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
225; OPT-NOT: call
226; OPT: br i1 %cmp0, label %bb4, label %Flow
227
228; OPT: bb4:
229; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
230; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
231; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
232; OPT-NEXT: br label %Flow
233
234; OPT: Flow:
235; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ]
236; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
237; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
238; OPT-NEXT: br i1 %1, label %bb9, label %bb1
239
240; OPT: bb9: ; preds = %Flow
241; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
242; OPT-NEXT: store volatile i32 7
243; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000244define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +0000245bb:
246 %id = call i32 @llvm.amdgcn.workitem.id.x()
247 %tmp = sub i32 %id, %arg
248 br label %bb1
249
250bb1: ; preds = %Flow, %bb
251 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
252 %lsr.iv.next = add i32 %lsr.iv, 1
253 %cmp0 = icmp slt i32 %lsr.iv.next, 0
254 br i1 %cmp0, label %bb4, label %Flow
255
256bb4: ; preds = %bb1
257 %load = load volatile i32, i32 addrspace(1)* undef, align 4
258 %cmp1 = icmp sge i32 %tmp, %load
259 br label %Flow
260
261Flow: ; preds = %bb4, %bb1
262 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
263 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
264 br i1 %tmp3, label %bb9, label %bb1
265
266bb9: ; preds = %Flow
267 store volatile i32 7, i32 addrspace(3)* undef
268 ret void
269}
270
271; Swap order of branches in flow block so that the true phi is
272; continue.
273
274; OPT-LABEL: @invert_true_phi_cond_break_loop(
275; OPT: bb1:
276; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
277; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
278; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
279; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
280; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
281
282; OPT: bb4:
283; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
284; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
285; OPT-NEXT: br label %Flow
286
287; OPT: Flow:
288; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
289; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
290; OPT-NEXT: %0 = xor i1 %tmp3, true
291; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken)
292; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1)
293; OPT-NEXT: br i1 %2, label %bb9, label %bb1
294
295; OPT: bb9:
296; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1)
297; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
298; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000299define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +0000300bb:
301 %id = call i32 @llvm.amdgcn.workitem.id.x()
302 %tmp = sub i32 %id, %arg
303 br label %bb1
304
305bb1: ; preds = %Flow, %bb
306 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
307 %lsr.iv.next = add i32 %lsr.iv, 1
308 %cmp0 = icmp slt i32 %lsr.iv.next, 0
309 br i1 %cmp0, label %bb4, label %Flow
310
311bb4: ; preds = %bb1
312 %load = load volatile i32, i32 addrspace(1)* undef, align 4
313 %cmp1 = icmp sge i32 %tmp, %load
314 br label %Flow
315
316Flow: ; preds = %bb4, %bb1
317 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
318 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
319 br i1 %tmp3, label %bb1, label %bb9
320
321bb9: ; preds = %Flow
322 store volatile i32 7, i32 addrspace(3)* undef
323 ret void
324}
325
Matt Arsenaultf14db7a2016-07-20 15:20:35 +0000326declare i32 @llvm.amdgcn.workitem.id.x() #1
327
328attributes #0 = { nounwind }
329attributes #1 = { nounwind readnone }