blob: 2de6b59e59e9669eea85850a06b50881684a9b3c [file] [log] [blame]
Matt Arsenaulte6740752016-09-29 01:44:16 +00001# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o - %s | FileCheck %s
2
3--- |
4 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
5
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00006 define amdgpu_kernel void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +00007 main_body:
8 %id = call i32 @llvm.amdgcn.workitem.id.x()
9 %cc = icmp eq i32 %id, 0
10 %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc)
11 %1 = extractvalue { i1, i64 } %0, 0
12 %2 = extractvalue { i1, i64 } %0, 1
13 br i1 %1, label %if, label %end
14
15 if: ; preds = %main_body
16 %v.if = load volatile i32, i32 addrspace(1)* undef
17 br label %end
18
19 end: ; preds = %if, %main_body
20 %r = phi i32 [ 4, %main_body ], [ %v.if, %if ]
21 call void @llvm.amdgcn.end.cf(i64 %2)
22 store i32 %r, i32 addrspace(1)* undef
23 ret void
24 }
25
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000026 define amdgpu_kernel void @optimize_if_and_saveexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000027 main_body:
28 br i1 undef, label %if, label %end
29
30 if:
31 br label %end
32
33 end:
34 ret void
35 }
36
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000037 define amdgpu_kernel void @optimize_if_or_saveexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000038 main_body:
39 br i1 undef, label %if, label %end
40
41 if:
42 br label %end
43
44 end:
45 ret void
46 }
47
48
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000049 define amdgpu_kernel void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000050 main_body:
51 %id = call i32 @llvm.amdgcn.workitem.id.x()
52 %cc = icmp eq i32 %id, 0
53 %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc)
54 %1 = extractvalue { i1, i64 } %0, 0
55 %2 = extractvalue { i1, i64 } %0, 1
56 store i32 %id, i32 addrspace(1)* undef
57 br i1 %1, label %if, label %end
58
59 if: ; preds = %main_body
60 %v.if = load volatile i32, i32 addrspace(1)* undef
61 br label %end
62
63 end: ; preds = %if, %main_body
64 %r = phi i32 [ 4, %main_body ], [ %v.if, %if ]
65 call void @llvm.amdgcn.end.cf(i64 %2)
66 store i32 %r, i32 addrspace(1)* undef
67 ret void
68 }
69
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000070 define amdgpu_kernel void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000071 main_body:
72 br i1 undef, label %if, label %end
73
74 if:
75 br label %end
76
77 end:
78 ret void
79 }
80
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081 define amdgpu_kernel void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000082 main_body:
83 br i1 undef, label %if, label %end
84
85 if:
86 br label %end
87
88 end:
89 ret void
90 }
91
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000092 define amdgpu_kernel void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000093 main_body:
94 br i1 undef, label %if, label %end
95
96 if:
97 br label %end
98
99 end:
100 ret void
101 }
102
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000103 define amdgpu_kernel void @optimize_if_unknown_saveexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +0000104 main_body:
105 br i1 undef, label %if, label %end
106
107 if:
108 br label %end
109
110 end:
111 ret void
112 }
113
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000114 define amdgpu_kernel void @optimize_if_andn2_saveexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +0000115 main_body:
116 br i1 undef, label %if, label %end
117
118 if:
119 br label %end
120
121 end:
122 ret void
123 }
124
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000125 define amdgpu_kernel void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +0000126 main_body:
127 br i1 undef, label %if, label %end
128
129 if:
130 br label %end
131
132 end:
133 ret void
134 }
135
136 ; Function Attrs: nounwind readnone
137 declare i32 @llvm.amdgcn.workitem.id.x() #1
138
139 declare { i1, i64 } @llvm.amdgcn.if(i1)
140
141 declare void @llvm.amdgcn.end.cf(i64)
142
143
144 attributes #0 = { nounwind }
145 attributes #1 = { nounwind readnone }
146
147...
148---
149# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
150# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
151# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
152# CHECK-NEXT: SI_MASK_BRANCH
153
154name: optimize_if_and_saveexec_xor
155alignment: 0
156exposesReturnsTwice: false
157legalized: false
158regBankSelected: false
159selected: false
160tracksRegLiveness: true
161liveins:
162 - { reg: '%vgpr0' }
163frameInfo:
164 isFrameAddressTaken: false
165 isReturnAddressTaken: false
166 hasStackMap: false
167 hasPatchPoint: false
168 stackSize: 0
169 offsetAdjustment: 0
170 maxAlignment: 0
171 adjustsStack: false
172 hasCalls: false
173 maxCallFrameSize: 0
174 hasOpaqueSPAdjustment: false
175 hasVAStart: false
176 hasMustTailInVarArgFunc: false
177body: |
178 bb.0.main_body:
179 successors: %bb.1.if, %bb.2.end
180 liveins: %vgpr0
181
182 %sgpr0_sgpr1 = COPY %exec
183 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
184 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
185 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
186 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
187 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
188 SI_MASK_BRANCH %bb.2.end, implicit %exec
189 S_BRANCH %bb.1.if
190
191 bb.1.if:
192 successors: %bb.2.end
193 liveins: %sgpr0_sgpr1
194
195 %sgpr7 = S_MOV_B32 61440
196 %sgpr6 = S_MOV_B32 -1
197 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
198
199 bb.2.end:
200 liveins: %vgpr0, %sgpr0_sgpr1
201
202 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
203 %sgpr3 = S_MOV_B32 61440
204 %sgpr2 = S_MOV_B32 -1
205 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
206 S_ENDPGM
207
208...
209---
210# CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
211# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
212# CHECK-NEXT: SI_MASK_BRANCH
213
214name: optimize_if_and_saveexec
215alignment: 0
216exposesReturnsTwice: false
217legalized: false
218regBankSelected: false
219selected: false
220tracksRegLiveness: true
221liveins:
222 - { reg: '%vgpr0' }
223frameInfo:
224 isFrameAddressTaken: false
225 isReturnAddressTaken: false
226 hasStackMap: false
227 hasPatchPoint: false
228 stackSize: 0
229 offsetAdjustment: 0
230 maxAlignment: 0
231 adjustsStack: false
232 hasCalls: false
233 maxCallFrameSize: 0
234 hasOpaqueSPAdjustment: false
235 hasVAStart: false
236 hasMustTailInVarArgFunc: false
237body: |
238 bb.0.main_body:
239 successors: %bb.1.if, %bb.2.end
240 liveins: %vgpr0
241
242 %sgpr0_sgpr1 = COPY %exec
243 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
244 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
245 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
246 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
247 SI_MASK_BRANCH %bb.2.end, implicit %exec
248 S_BRANCH %bb.1.if
249
250 bb.1.if:
251 successors: %bb.2.end
252 liveins: %sgpr0_sgpr1
253
254 %sgpr7 = S_MOV_B32 61440
255 %sgpr6 = S_MOV_B32 -1
256 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
257
258 bb.2.end:
259 liveins: %vgpr0, %sgpr0_sgpr1
260
261 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
262 %sgpr3 = S_MOV_B32 61440
263 %sgpr2 = S_MOV_B32 -1
264 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
265 S_ENDPGM
266
267...
268---
269# CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
270# CHECK: %sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
271# CHECK-NEXT: SI_MASK_BRANCH
272
273name: optimize_if_or_saveexec
274alignment: 0
275exposesReturnsTwice: false
276legalized: false
277regBankSelected: false
278selected: false
279tracksRegLiveness: true
280liveins:
281 - { reg: '%vgpr0' }
282frameInfo:
283 isFrameAddressTaken: false
284 isReturnAddressTaken: false
285 hasStackMap: false
286 hasPatchPoint: false
287 stackSize: 0
288 offsetAdjustment: 0
289 maxAlignment: 0
290 adjustsStack: false
291 hasCalls: false
292 maxCallFrameSize: 0
293 hasOpaqueSPAdjustment: false
294 hasVAStart: false
295 hasMustTailInVarArgFunc: false
296body: |
297 bb.0.main_body:
298 successors: %bb.1.if, %bb.2.end
299 liveins: %vgpr0
300
301 %sgpr0_sgpr1 = COPY %exec
302 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
303 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
304 %sgpr2_sgpr3 = S_OR_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
305 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
306 SI_MASK_BRANCH %bb.2.end, implicit %exec
307 S_BRANCH %bb.1.if
308
309 bb.1.if:
310 successors: %bb.2.end
311 liveins: %sgpr0_sgpr1
312
313 %sgpr7 = S_MOV_B32 61440
314 %sgpr6 = S_MOV_B32 -1
315 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
316
317 bb.2.end:
318 liveins: %vgpr0, %sgpr0_sgpr1
319
320 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
321 %sgpr3 = S_MOV_B32 61440
322 %sgpr2 = S_MOV_B32 -1
323 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
324 S_ENDPGM
325
326...
327---
328# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
329# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
330# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
331# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
332# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3
333# CHECK-NEXT: SI_MASK_BRANCH
334name: optimize_if_and_saveexec_xor_valu_middle
335alignment: 0
336exposesReturnsTwice: false
337legalized: false
338regBankSelected: false
339selected: false
340tracksRegLiveness: true
341liveins:
342 - { reg: '%vgpr0' }
343frameInfo:
344 isFrameAddressTaken: false
345 isReturnAddressTaken: false
346 hasStackMap: false
347 hasPatchPoint: false
348 stackSize: 0
349 offsetAdjustment: 0
350 maxAlignment: 0
351 adjustsStack: false
352 hasCalls: false
353 maxCallFrameSize: 0
354 hasOpaqueSPAdjustment: false
355 hasVAStart: false
356 hasMustTailInVarArgFunc: false
357body: |
358 bb.0.main_body:
359 successors: %bb.1.if, %bb.2.end
360 liveins: %vgpr0
361
362 %sgpr0_sgpr1 = COPY %exec
363 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
364 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
365 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
366 BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
367 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
368 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
369 SI_MASK_BRANCH %bb.2.end, implicit %exec
370 S_BRANCH %bb.1.if
371
372 bb.1.if:
373 successors: %bb.2.end
374 liveins: %sgpr0_sgpr1
375
376 %sgpr7 = S_MOV_B32 61440
377 %sgpr6 = S_MOV_B32 -1
378 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
379
380 bb.2.end:
381 liveins: %vgpr0, %sgpr0_sgpr1
382
383 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
384 %sgpr3 = S_MOV_B32 61440
385 %sgpr2 = S_MOV_B32 -1
386 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
387 S_ENDPGM
388
389...
390---
391# CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}}
392# CHECK: %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
393# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
394# CHECK-NEXT: %exec = COPY %sgpr0_sgpr1
395# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec
396name: optimize_if_and_saveexec_xor_wrong_reg
397alignment: 0
398exposesReturnsTwice: false
399legalized: false
400regBankSelected: false
401selected: false
402tracksRegLiveness: true
403liveins:
404 - { reg: '%vgpr0' }
405frameInfo:
406 isFrameAddressTaken: false
407 isReturnAddressTaken: false
408 hasStackMap: false
409 hasPatchPoint: false
410 stackSize: 0
411 offsetAdjustment: 0
412 maxAlignment: 0
413 adjustsStack: false
414 hasCalls: false
415 maxCallFrameSize: 0
416 hasOpaqueSPAdjustment: false
417 hasVAStart: false
418 hasMustTailInVarArgFunc: false
419body: |
420 bb.0.main_body:
421 successors: %bb.1.if, %bb.2.end
422 liveins: %vgpr0
423
424 %sgpr6 = S_MOV_B32 -1
425 %sgpr7 = S_MOV_B32 61440
426 %sgpr0_sgpr1 = COPY %exec
427 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
428 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
429 %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
430 %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
431 %exec = S_MOV_B64_term %sgpr0_sgpr1
432 SI_MASK_BRANCH %bb.2.end, implicit %exec
433 S_BRANCH %bb.1.if
434
435 bb.1.if:
436 successors: %bb.2.end
437 liveins: %sgpr0_sgpr1 , %sgpr4_sgpr5_sgpr6_sgpr7
438 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
439
440 bb.2.end:
441 liveins: %vgpr0, %sgpr0_sgpr1, %sgpr4_sgpr5_sgpr6_sgpr7
442
443 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
444 %sgpr3 = S_MOV_B32 61440
445 %sgpr2 = S_MOV_B32 -1
446 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
447 S_ENDPGM
448
449...
450---
451# CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}}
452# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
453# CHECK-NEXT: %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc
454# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
455# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3
456# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec
457
458name: optimize_if_and_saveexec_xor_modify_copy_to_exec
459alignment: 0
460exposesReturnsTwice: false
461legalized: false
462regBankSelected: false
463selected: false
464tracksRegLiveness: true
465liveins:
466 - { reg: '%vgpr0' }
467frameInfo:
468 isFrameAddressTaken: false
469 isReturnAddressTaken: false
470 hasStackMap: false
471 hasPatchPoint: false
472 stackSize: 0
473 offsetAdjustment: 0
474 maxAlignment: 0
475 adjustsStack: false
476 hasCalls: false
477 maxCallFrameSize: 0
478 hasOpaqueSPAdjustment: false
479 hasVAStart: false
480 hasMustTailInVarArgFunc: false
481body: |
482 bb.0.main_body:
483 successors: %bb.1.if, %bb.2.end
484 liveins: %vgpr0
485
486 %sgpr0_sgpr1 = COPY %exec
487 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
488 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
489 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
490 %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc
491 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
492 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
493 SI_MASK_BRANCH %bb.2.end, implicit %exec
494 S_BRANCH %bb.1.if
495
496 bb.1.if:
497 successors: %bb.2.end
498 liveins: %sgpr0_sgpr1
499
500 %sgpr7 = S_MOV_B32 61440
501 %sgpr6 = S_MOV_B32 -1
502 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
503
504 bb.2.end:
505 liveins: %vgpr0, %sgpr0_sgpr1
506
507 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
508 %sgpr0 = S_MOV_B32 0
509 %sgpr1 = S_MOV_B32 1
510 %sgpr2 = S_MOV_B32 -1
511 %sgpr3 = S_MOV_B32 61440
512 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
513 S_ENDPGM
514
515...
516---
517# CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}}
518# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
519# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
520# CHECK-NEXT: %exec = COPY %sgpr2_sgpr3
521# CHECK-NEXT: SI_MASK_BRANCH
522name: optimize_if_and_saveexec_xor_live_out_setexec
523alignment: 0
524exposesReturnsTwice: false
525legalized: false
526regBankSelected: false
527selected: false
528tracksRegLiveness: true
529liveins:
530 - { reg: '%vgpr0' }
531frameInfo:
532 isFrameAddressTaken: false
533 isReturnAddressTaken: false
534 hasStackMap: false
535 hasPatchPoint: false
536 stackSize: 0
537 offsetAdjustment: 0
538 maxAlignment: 0
539 adjustsStack: false
540 hasCalls: false
541 maxCallFrameSize: 0
542 hasOpaqueSPAdjustment: false
543 hasVAStart: false
544 hasMustTailInVarArgFunc: false
545body: |
546 bb.0.main_body:
547 successors: %bb.1.if, %bb.2.end
548 liveins: %vgpr0
549
550 %sgpr0_sgpr1 = COPY %exec
551 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
552 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
553 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
554 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
555 %exec = S_MOV_B64_term %sgpr2_sgpr3
556 SI_MASK_BRANCH %bb.2.end, implicit %exec
557 S_BRANCH %bb.1.if
558
559 bb.1.if:
560 successors: %bb.2.end
561 liveins: %sgpr0_sgpr1, %sgpr2_sgpr3
562 S_SLEEP 0, implicit %sgpr2_sgpr3
563 %sgpr7 = S_MOV_B32 61440
564 %sgpr6 = S_MOV_B32 -1
565 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
566
567 bb.2.end:
568 liveins: %vgpr0, %sgpr0_sgpr1
569
570 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
571 %sgpr3 = S_MOV_B32 61440
572 %sgpr2 = S_MOV_B32 -1
573 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
574 S_ENDPGM
575
576...
577
578# CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}}
579# CHECK: %sgpr0_sgpr1 = COPY %exec
580# CHECK: %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc
581# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3
582# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec
583
584name: optimize_if_unknown_saveexec
585alignment: 0
586exposesReturnsTwice: false
587legalized: false
588regBankSelected: false
589selected: false
590tracksRegLiveness: true
591liveins:
592 - { reg: '%vgpr0' }
593frameInfo:
594 isFrameAddressTaken: false
595 isReturnAddressTaken: false
596 hasStackMap: false
597 hasPatchPoint: false
598 stackSize: 0
599 offsetAdjustment: 0
600 maxAlignment: 0
601 adjustsStack: false
602 hasCalls: false
603 maxCallFrameSize: 0
604 hasOpaqueSPAdjustment: false
605 hasVAStart: false
606 hasMustTailInVarArgFunc: false
607body: |
608 bb.0.main_body:
609 successors: %bb.1.if, %bb.2.end
610 liveins: %vgpr0
611
612 %sgpr0_sgpr1 = COPY %exec
613 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
614 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
615 %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc
616 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
617 SI_MASK_BRANCH %bb.2.end, implicit %exec
618 S_BRANCH %bb.1.if
619
620 bb.1.if:
621 successors: %bb.2.end
622 liveins: %sgpr0_sgpr1
623
624 %sgpr7 = S_MOV_B32 61440
625 %sgpr6 = S_MOV_B32 -1
626 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
627
628 bb.2.end:
629 liveins: %vgpr0, %sgpr0_sgpr1
630
631 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
632 %sgpr3 = S_MOV_B32 61440
633 %sgpr2 = S_MOV_B32 -1
634 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
635 S_ENDPGM
636
637...
638---
639# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
640# CHECK: %sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
641# CHECK-NEXT: SI_MASK_BRANCH
642
643name: optimize_if_andn2_saveexec
644alignment: 0
645exposesReturnsTwice: false
646legalized: false
647regBankSelected: false
648selected: false
649tracksRegLiveness: true
650liveins:
651 - { reg: '%vgpr0' }
652frameInfo:
653 isFrameAddressTaken: false
654 isReturnAddressTaken: false
655 hasStackMap: false
656 hasPatchPoint: false
657 stackSize: 0
658 offsetAdjustment: 0
659 maxAlignment: 0
660 adjustsStack: false
661 hasCalls: false
662 maxCallFrameSize: 0
663 hasOpaqueSPAdjustment: false
664 hasVAStart: false
665 hasMustTailInVarArgFunc: false
666body: |
667 bb.0.main_body:
668 successors: %bb.1.if, %bb.2.end
669 liveins: %vgpr0
670
671 %sgpr0_sgpr1 = COPY %exec
672 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
673 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
674 %sgpr2_sgpr3 = S_ANDN2_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
675 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
676 SI_MASK_BRANCH %bb.2.end, implicit %exec
677 S_BRANCH %bb.1.if
678
679 bb.1.if:
680 successors: %bb.2.end
681 liveins: %sgpr0_sgpr1
682
683 %sgpr7 = S_MOV_B32 61440
684 %sgpr6 = S_MOV_B32 -1
685 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
686
687 bb.2.end:
688 liveins: %vgpr0, %sgpr0_sgpr1
689
690 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
691 %sgpr3 = S_MOV_B32 61440
692 %sgpr2 = S_MOV_B32 -1
693 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
694 S_ENDPGM
695
696...
697---
698# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
699# CHECK: %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc
700# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3
701# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec
702name: optimize_if_andn2_saveexec_no_commute
703alignment: 0
704exposesReturnsTwice: false
705legalized: false
706regBankSelected: false
707selected: false
708tracksRegLiveness: true
709liveins:
710 - { reg: '%vgpr0' }
711frameInfo:
712 isFrameAddressTaken: false
713 isReturnAddressTaken: false
714 hasStackMap: false
715 hasPatchPoint: false
716 stackSize: 0
717 offsetAdjustment: 0
718 maxAlignment: 0
719 adjustsStack: false
720 hasCalls: false
721 maxCallFrameSize: 0
722 hasOpaqueSPAdjustment: false
723 hasVAStart: false
724 hasMustTailInVarArgFunc: false
725body: |
726 bb.0.main_body:
727 successors: %bb.1.if, %bb.2.end
728 liveins: %vgpr0
729
730 %sgpr0_sgpr1 = COPY %exec
731 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
732 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
733 %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc
734 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
735 SI_MASK_BRANCH %bb.2.end, implicit %exec
736 S_BRANCH %bb.1.if
737
738 bb.1.if:
739 successors: %bb.2.end
740 liveins: %sgpr0_sgpr1
741
742 %sgpr7 = S_MOV_B32 61440
743 %sgpr6 = S_MOV_B32 -1
744 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
745
746 bb.2.end:
747 liveins: %vgpr0, %sgpr0_sgpr1
748
749 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
750 %sgpr3 = S_MOV_B32 61440
751 %sgpr2 = S_MOV_B32 -1
752 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
753 S_ENDPGM
754
755...