blob: b2641cd4d2e4104947d61ce50f2de4fae4476cf7 [file] [log] [blame]
Matt Arsenaultf14db7a2016-07-20 15:20:35 +00001; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6; OPT-LABEL: @break_loop(
7; OPT: bb1:
8; OPT: call i64 @llvm.amdgcn.break(i64
9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
10
11; OPT: bb4:
12; OPT: load volatile
Matt Arsenault44746522017-04-24 20:25:01 +000013; OPT: xor i1 %cmp1
Matt Arsenaultf14db7a2016-07-20 15:20:35 +000014; OPT: call i64 @llvm.amdgcn.if.break(
15; OPT: br label %Flow
16
17; OPT: Flow:
18; OPT: call i1 @llvm.amdgcn.loop(i64
19; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
20
21; OPT: bb9:
22; OPT: call void @llvm.amdgcn.end.cf(i64
23
24; TODO: Can remove exec fixes in return block
25; GCN-LABEL: {{^}}break_loop:
26; GCN: s_mov_b64 [[INITMASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
27
28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]]
Wei Ding74da3502017-04-12 23:51:47 +000030; GCN: v_cmp_lt_i32_e32 vcc, -1
31; GCN: s_and_b64 vcc, exec, vcc
32; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
Matt Arsenaultf14db7a2016-07-20 15:20:35 +000033
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000034; GCN: ; %bb.2: ; %bb4
Matt Arsenaultf14db7a2016-07-20 15:20:35 +000035; GCN: buffer_load_dword
36; GCN: v_cmp_ge_i32_e32 vcc,
37; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]]
38
39; GCN: [[FLOW]]:
40; GCN: s_mov_b64 [[INITMASK]], [[MASK]]
41; GCN: s_andn2_b64 exec, exec, [[MASK]]
42; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
43
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000044; GCN: ; %bb.4: ; %bb9
Matt Arsenaultf14db7a2016-07-20 15:20:35 +000045; GCN-NEXT: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000046define amdgpu_kernel void @break_loop(i32 %arg) #0 {
Matt Arsenaultf14db7a2016-07-20 15:20:35 +000047bb:
48 %id = call i32 @llvm.amdgcn.workitem.id.x()
49 %tmp = sub i32 %id, %arg
50 br label %bb1
51
52bb1:
53 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
54 %lsr.iv.next = add i32 %lsr.iv, 1
55 %cmp0 = icmp slt i32 %lsr.iv.next, 0
56 br i1 %cmp0, label %bb4, label %bb9
57
58bb4:
59 %load = load volatile i32, i32 addrspace(1)* undef, align 4
60 %cmp1 = icmp slt i32 %tmp, %load
61 br i1 %cmp1, label %bb1, label %bb9
62
63bb9:
64 ret void
65}
66
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +000067; OPT-LABEL: @undef_phi_cond_break_loop(
68; OPT: bb1:
69; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
70; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
71; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken)
72; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
73
74; OPT: bb4:
75; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
76; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
77; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
78; OPT-NEXT: br label %Flow
79
80; OPT: Flow:
81; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
82; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
83; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
84; OPT-NEXT: br i1 %2, label %bb9, label %bb1
85
86; OPT: bb9: ; preds = %Flow
87; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
88; OPT-NEXT: store volatile i32 7
89; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000090define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +000091bb:
92 %id = call i32 @llvm.amdgcn.workitem.id.x()
93 %tmp = sub i32 %id, %arg
94 br label %bb1
95
96bb1: ; preds = %Flow, %bb
97 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
98 %lsr.iv.next = add i32 %lsr.iv, 1
99 %cmp0 = icmp slt i32 %lsr.iv.next, 0
100 br i1 %cmp0, label %bb4, label %Flow
101
102bb4: ; preds = %bb1
103 %load = load volatile i32, i32 addrspace(1)* undef, align 4
104 %cmp1 = icmp sge i32 %tmp, %load
105 br label %Flow
106
107Flow: ; preds = %bb4, %bb1
108 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
109 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
110 br i1 %tmp3, label %bb9, label %bb1
111
112bb9: ; preds = %Flow
113 store volatile i32 7, i32 addrspace(3)* undef
114 ret void
115}
116
117; FIXME: ConstantExpr compare of address to null folds away
118@lds = addrspace(3) global i32 undef
119
120; OPT-LABEL: @constexpr_phi_cond_break_loop(
121; OPT: bb1:
122; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
123; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
124; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken)
125; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
126
127; OPT: bb4:
128; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
129; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
130; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
131; OPT-NEXT: br label %Flow
132
133; OPT: Flow:
134; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
135; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
136; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
137; OPT-NEXT: br i1 %2, label %bb9, label %bb1
138
139; OPT: bb9: ; preds = %Flow
140; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
141; OPT-NEXT: store volatile i32 7
142; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000143define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +0000144bb:
145 %id = call i32 @llvm.amdgcn.workitem.id.x()
146 %tmp = sub i32 %id, %arg
147 br label %bb1
148
149bb1: ; preds = %Flow, %bb
150 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
151 %lsr.iv.next = add i32 %lsr.iv, 1
152 %cmp0 = icmp slt i32 %lsr.iv.next, 0
153 br i1 %cmp0, label %bb4, label %Flow
154
155bb4: ; preds = %bb1
156 %load = load volatile i32, i32 addrspace(1)* undef, align 4
157 %cmp1 = icmp sge i32 %tmp, %load
158 br label %Flow
159
160Flow: ; preds = %bb4, %bb1
161 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
162 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
163 br i1 %tmp3, label %bb9, label %bb1
164
165bb9: ; preds = %Flow
166 store volatile i32 7, i32 addrspace(3)* undef
167 ret void
168}
169
170; OPT-LABEL: @true_phi_cond_break_loop(
171; OPT: bb1:
172; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
173; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
174; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken)
175; OPT: br i1 %cmp0, label %bb4, label %Flow
176
177; OPT: bb4:
178; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
179; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
180; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
181; OPT-NEXT: br label %Flow
182
183; OPT: Flow:
184; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
185; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
186; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
187; OPT-NEXT: br i1 %2, label %bb9, label %bb1
188
189; OPT: bb9: ; preds = %Flow
190; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
191; OPT-NEXT: store volatile i32 7
192; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000193define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +0000194bb:
195 %id = call i32 @llvm.amdgcn.workitem.id.x()
196 %tmp = sub i32 %id, %arg
197 br label %bb1
198
199bb1: ; preds = %Flow, %bb
200 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
201 %lsr.iv.next = add i32 %lsr.iv, 1
202 %cmp0 = icmp slt i32 %lsr.iv.next, 0
203 br i1 %cmp0, label %bb4, label %Flow
204
205bb4: ; preds = %bb1
206 %load = load volatile i32, i32 addrspace(1)* undef, align 4
207 %cmp1 = icmp sge i32 %tmp, %load
208 br label %Flow
209
210Flow: ; preds = %bb4, %bb1
211 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
212 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
213 br i1 %tmp3, label %bb9, label %bb1
214
215bb9: ; preds = %Flow
216 store volatile i32 7, i32 addrspace(3)* undef
217 ret void
218}
219
220; OPT-LABEL: @false_phi_cond_break_loop(
221; OPT: bb1:
222; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
223; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
224; OPT-NOT: call
225; OPT: br i1 %cmp0, label %bb4, label %Flow
226
227; OPT: bb4:
228; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
229; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
230; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
231; OPT-NEXT: br label %Flow
232
233; OPT: Flow:
234; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ]
235; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
236; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
237; OPT-NEXT: br i1 %1, label %bb9, label %bb1
238
239; OPT: bb9: ; preds = %Flow
240; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
241; OPT-NEXT: store volatile i32 7
242; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000243define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +0000244bb:
245 %id = call i32 @llvm.amdgcn.workitem.id.x()
246 %tmp = sub i32 %id, %arg
247 br label %bb1
248
249bb1: ; preds = %Flow, %bb
250 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
251 %lsr.iv.next = add i32 %lsr.iv, 1
252 %cmp0 = icmp slt i32 %lsr.iv.next, 0
253 br i1 %cmp0, label %bb4, label %Flow
254
255bb4: ; preds = %bb1
256 %load = load volatile i32, i32 addrspace(1)* undef, align 4
257 %cmp1 = icmp sge i32 %tmp, %load
258 br label %Flow
259
260Flow: ; preds = %bb4, %bb1
261 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
262 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
263 br i1 %tmp3, label %bb9, label %bb1
264
265bb9: ; preds = %Flow
266 store volatile i32 7, i32 addrspace(3)* undef
267 ret void
268}
269
270; Swap order of branches in flow block so that the true phi is
271; continue.
272
273; OPT-LABEL: @invert_true_phi_cond_break_loop(
274; OPT: bb1:
275; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
276; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
277; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
278; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
279; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
280
281; OPT: bb4:
282; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
283; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
284; OPT-NEXT: br label %Flow
285
286; OPT: Flow:
287; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
288; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
289; OPT-NEXT: %0 = xor i1 %tmp3, true
290; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken)
291; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1)
292; OPT-NEXT: br i1 %2, label %bb9, label %bb1
293
294; OPT: bb9:
295; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1)
296; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
297; OPT-NEXT: ret void
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000298define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
Matt Arsenaulte70d5dc2017-03-17 20:52:21 +0000299bb:
300 %id = call i32 @llvm.amdgcn.workitem.id.x()
301 %tmp = sub i32 %id, %arg
302 br label %bb1
303
304bb1: ; preds = %Flow, %bb
305 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
306 %lsr.iv.next = add i32 %lsr.iv, 1
307 %cmp0 = icmp slt i32 %lsr.iv.next, 0
308 br i1 %cmp0, label %bb4, label %Flow
309
310bb4: ; preds = %bb1
311 %load = load volatile i32, i32 addrspace(1)* undef, align 4
312 %cmp1 = icmp sge i32 %tmp, %load
313 br label %Flow
314
315Flow: ; preds = %bb4, %bb1
316 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
317 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
318 br i1 %tmp3, label %bb1, label %bb9
319
320bb9: ; preds = %Flow
321 store volatile i32 7, i32 addrspace(3)* undef
322 ret void
323}
324
Matt Arsenaultf14db7a2016-07-20 15:20:35 +0000325declare i32 @llvm.amdgcn.workitem.id.x() #1
326
327attributes #0 = { nounwind }
328attributes #1 = { nounwind readnone }