blob: b5dc9d9dac8413f1af0613e65d3ddeb169d97271 [file] [log] [blame]
Matt Arsenaulte6740752016-09-29 01:44:16 +00001# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o - %s | FileCheck %s
2
3--- |
4 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
5
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00006 define amdgpu_kernel void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +00007 main_body:
8 %id = call i32 @llvm.amdgcn.workitem.id.x()
9 %cc = icmp eq i32 %id, 0
10 %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc)
11 %1 = extractvalue { i1, i64 } %0, 0
12 %2 = extractvalue { i1, i64 } %0, 1
13 br i1 %1, label %if, label %end
14
15 if: ; preds = %main_body
16 %v.if = load volatile i32, i32 addrspace(1)* undef
17 br label %end
18
19 end: ; preds = %if, %main_body
20 %r = phi i32 [ 4, %main_body ], [ %v.if, %if ]
21 call void @llvm.amdgcn.end.cf(i64 %2)
22 store i32 %r, i32 addrspace(1)* undef
23 ret void
24 }
25
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000026 define amdgpu_kernel void @optimize_if_and_saveexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000027 main_body:
28 br i1 undef, label %if, label %end
29
30 if:
31 br label %end
32
33 end:
34 ret void
35 }
36
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000037 define amdgpu_kernel void @optimize_if_or_saveexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000038 main_body:
39 br i1 undef, label %if, label %end
40
41 if:
42 br label %end
43
44 end:
45 ret void
46 }
47
48
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000049 define amdgpu_kernel void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000050 main_body:
51 %id = call i32 @llvm.amdgcn.workitem.id.x()
52 %cc = icmp eq i32 %id, 0
53 %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc)
54 %1 = extractvalue { i1, i64 } %0, 0
55 %2 = extractvalue { i1, i64 } %0, 1
56 store i32 %id, i32 addrspace(1)* undef
57 br i1 %1, label %if, label %end
58
59 if: ; preds = %main_body
60 %v.if = load volatile i32, i32 addrspace(1)* undef
61 br label %end
62
63 end: ; preds = %if, %main_body
64 %r = phi i32 [ 4, %main_body ], [ %v.if, %if ]
65 call void @llvm.amdgcn.end.cf(i64 %2)
66 store i32 %r, i32 addrspace(1)* undef
67 ret void
68 }
69
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000070 define amdgpu_kernel void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000071 main_body:
72 br i1 undef, label %if, label %end
73
74 if:
75 br label %end
76
77 end:
78 ret void
79 }
80
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081 define amdgpu_kernel void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000082 main_body:
83 br i1 undef, label %if, label %end
84
85 if:
86 br label %end
87
88 end:
89 ret void
90 }
91
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000092 define amdgpu_kernel void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +000093 main_body:
94 br i1 undef, label %if, label %end
95
96 if:
97 br label %end
98
99 end:
100 ret void
101 }
102
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000103 define amdgpu_kernel void @optimize_if_unknown_saveexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +0000104 main_body:
105 br i1 undef, label %if, label %end
106
107 if:
108 br label %end
109
110 end:
111 ret void
112 }
113
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000114 define amdgpu_kernel void @optimize_if_andn2_saveexec(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +0000115 main_body:
116 br i1 undef, label %if, label %end
117
118 if:
119 br label %end
120
121 end:
122 ret void
123 }
124
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000125 define amdgpu_kernel void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) #0 {
Matt Arsenaulte6740752016-09-29 01:44:16 +0000126 main_body:
127 br i1 undef, label %if, label %end
128
129 if:
130 br label %end
131
132 end:
133 ret void
134 }
135
136 ; Function Attrs: nounwind readnone
137 declare i32 @llvm.amdgcn.workitem.id.x() #1
138
139 declare { i1, i64 } @llvm.amdgcn.if(i1)
140
141 declare void @llvm.amdgcn.end.cf(i64)
142
143
144 attributes #0 = { nounwind }
145 attributes #1 = { nounwind readnone }
146
147...
148---
149# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
150# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
151# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
152# CHECK-NEXT: SI_MASK_BRANCH
153
154name: optimize_if_and_saveexec_xor
155alignment: 0
156exposesReturnsTwice: false
157legalized: false
158regBankSelected: false
159selected: false
160tracksRegLiveness: true
161liveins:
162 - { reg: '%vgpr0' }
163frameInfo:
164 isFrameAddressTaken: false
165 isReturnAddressTaken: false
166 hasStackMap: false
167 hasPatchPoint: false
168 stackSize: 0
169 offsetAdjustment: 0
170 maxAlignment: 0
171 adjustsStack: false
172 hasCalls: false
173 maxCallFrameSize: 0
174 hasOpaqueSPAdjustment: false
175 hasVAStart: false
176 hasMustTailInVarArgFunc: false
177body: |
178 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000179 liveins: %vgpr0
180
181 %sgpr0_sgpr1 = COPY %exec
182 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
183 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
184 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
185 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
186 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
187 SI_MASK_BRANCH %bb.2.end, implicit %exec
188 S_BRANCH %bb.1.if
189
190 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000191 liveins: %sgpr0_sgpr1
192
193 %sgpr7 = S_MOV_B32 61440
194 %sgpr6 = S_MOV_B32 -1
195 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
196
197 bb.2.end:
198 liveins: %vgpr0, %sgpr0_sgpr1
199
200 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
201 %sgpr3 = S_MOV_B32 61440
202 %sgpr2 = S_MOV_B32 -1
203 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
204 S_ENDPGM
205
206...
207---
208# CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
209# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
210# CHECK-NEXT: SI_MASK_BRANCH
211
212name: optimize_if_and_saveexec
213alignment: 0
214exposesReturnsTwice: false
215legalized: false
216regBankSelected: false
217selected: false
218tracksRegLiveness: true
219liveins:
220 - { reg: '%vgpr0' }
221frameInfo:
222 isFrameAddressTaken: false
223 isReturnAddressTaken: false
224 hasStackMap: false
225 hasPatchPoint: false
226 stackSize: 0
227 offsetAdjustment: 0
228 maxAlignment: 0
229 adjustsStack: false
230 hasCalls: false
231 maxCallFrameSize: 0
232 hasOpaqueSPAdjustment: false
233 hasVAStart: false
234 hasMustTailInVarArgFunc: false
235body: |
236 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000237 liveins: %vgpr0
238
239 %sgpr0_sgpr1 = COPY %exec
240 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
241 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
242 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
243 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
244 SI_MASK_BRANCH %bb.2.end, implicit %exec
245 S_BRANCH %bb.1.if
246
247 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000248 liveins: %sgpr0_sgpr1
249
250 %sgpr7 = S_MOV_B32 61440
251 %sgpr6 = S_MOV_B32 -1
252 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
253
254 bb.2.end:
255 liveins: %vgpr0, %sgpr0_sgpr1
256
257 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
258 %sgpr3 = S_MOV_B32 61440
259 %sgpr2 = S_MOV_B32 -1
260 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
261 S_ENDPGM
262
263...
264---
265# CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
266# CHECK: %sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
267# CHECK-NEXT: SI_MASK_BRANCH
268
269name: optimize_if_or_saveexec
270alignment: 0
271exposesReturnsTwice: false
272legalized: false
273regBankSelected: false
274selected: false
275tracksRegLiveness: true
276liveins:
277 - { reg: '%vgpr0' }
278frameInfo:
279 isFrameAddressTaken: false
280 isReturnAddressTaken: false
281 hasStackMap: false
282 hasPatchPoint: false
283 stackSize: 0
284 offsetAdjustment: 0
285 maxAlignment: 0
286 adjustsStack: false
287 hasCalls: false
288 maxCallFrameSize: 0
289 hasOpaqueSPAdjustment: false
290 hasVAStart: false
291 hasMustTailInVarArgFunc: false
292body: |
293 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000294 liveins: %vgpr0
295
296 %sgpr0_sgpr1 = COPY %exec
297 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
298 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
299 %sgpr2_sgpr3 = S_OR_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
300 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
301 SI_MASK_BRANCH %bb.2.end, implicit %exec
302 S_BRANCH %bb.1.if
303
304 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000305 liveins: %sgpr0_sgpr1
306
307 %sgpr7 = S_MOV_B32 61440
308 %sgpr6 = S_MOV_B32 -1
309 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
310
311 bb.2.end:
312 liveins: %vgpr0, %sgpr0_sgpr1
313
314 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
315 %sgpr3 = S_MOV_B32 61440
316 %sgpr2 = S_MOV_B32 -1
317 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
318 S_ENDPGM
319
320...
321---
322# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
323# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
324# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
325# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
326# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3
327# CHECK-NEXT: SI_MASK_BRANCH
328name: optimize_if_and_saveexec_xor_valu_middle
329alignment: 0
330exposesReturnsTwice: false
331legalized: false
332regBankSelected: false
333selected: false
334tracksRegLiveness: true
335liveins:
336 - { reg: '%vgpr0' }
337frameInfo:
338 isFrameAddressTaken: false
339 isReturnAddressTaken: false
340 hasStackMap: false
341 hasPatchPoint: false
342 stackSize: 0
343 offsetAdjustment: 0
344 maxAlignment: 0
345 adjustsStack: false
346 hasCalls: false
347 maxCallFrameSize: 0
348 hasOpaqueSPAdjustment: false
349 hasVAStart: false
350 hasMustTailInVarArgFunc: false
351body: |
352 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000353 liveins: %vgpr0
354
355 %sgpr0_sgpr1 = COPY %exec
356 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
357 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
358 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
359 BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
360 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
361 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
362 SI_MASK_BRANCH %bb.2.end, implicit %exec
363 S_BRANCH %bb.1.if
364
365 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000366 liveins: %sgpr0_sgpr1
367
368 %sgpr7 = S_MOV_B32 61440
369 %sgpr6 = S_MOV_B32 -1
370 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
371
372 bb.2.end:
373 liveins: %vgpr0, %sgpr0_sgpr1
374
375 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
376 %sgpr3 = S_MOV_B32 61440
377 %sgpr2 = S_MOV_B32 -1
378 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
379 S_ENDPGM
380
381...
382---
383# CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}}
384# CHECK: %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
385# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
386# CHECK-NEXT: %exec = COPY %sgpr0_sgpr1
387# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec
388name: optimize_if_and_saveexec_xor_wrong_reg
389alignment: 0
390exposesReturnsTwice: false
391legalized: false
392regBankSelected: false
393selected: false
394tracksRegLiveness: true
395liveins:
396 - { reg: '%vgpr0' }
397frameInfo:
398 isFrameAddressTaken: false
399 isReturnAddressTaken: false
400 hasStackMap: false
401 hasPatchPoint: false
402 stackSize: 0
403 offsetAdjustment: 0
404 maxAlignment: 0
405 adjustsStack: false
406 hasCalls: false
407 maxCallFrameSize: 0
408 hasOpaqueSPAdjustment: false
409 hasVAStart: false
410 hasMustTailInVarArgFunc: false
411body: |
412 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000413 liveins: %vgpr0
414
415 %sgpr6 = S_MOV_B32 -1
416 %sgpr7 = S_MOV_B32 61440
417 %sgpr0_sgpr1 = COPY %exec
418 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
419 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
420 %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
421 %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
422 %exec = S_MOV_B64_term %sgpr0_sgpr1
423 SI_MASK_BRANCH %bb.2.end, implicit %exec
424 S_BRANCH %bb.1.if
425
426 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000427 liveins: %sgpr0_sgpr1 , %sgpr4_sgpr5_sgpr6_sgpr7
428 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
429
430 bb.2.end:
431 liveins: %vgpr0, %sgpr0_sgpr1, %sgpr4_sgpr5_sgpr6_sgpr7
432
433 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
434 %sgpr3 = S_MOV_B32 61440
435 %sgpr2 = S_MOV_B32 -1
436 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
437 S_ENDPGM
438
439...
440---
441# CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}}
442# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
443# CHECK-NEXT: %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc
444# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
445# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3
446# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec
447
448name: optimize_if_and_saveexec_xor_modify_copy_to_exec
449alignment: 0
450exposesReturnsTwice: false
451legalized: false
452regBankSelected: false
453selected: false
454tracksRegLiveness: true
455liveins:
456 - { reg: '%vgpr0' }
457frameInfo:
458 isFrameAddressTaken: false
459 isReturnAddressTaken: false
460 hasStackMap: false
461 hasPatchPoint: false
462 stackSize: 0
463 offsetAdjustment: 0
464 maxAlignment: 0
465 adjustsStack: false
466 hasCalls: false
467 maxCallFrameSize: 0
468 hasOpaqueSPAdjustment: false
469 hasVAStart: false
470 hasMustTailInVarArgFunc: false
471body: |
472 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000473 liveins: %vgpr0
474
475 %sgpr0_sgpr1 = COPY %exec
476 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
477 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
478 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
479 %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc
480 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
481 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
482 SI_MASK_BRANCH %bb.2.end, implicit %exec
483 S_BRANCH %bb.1.if
484
485 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000486 liveins: %sgpr0_sgpr1
487
488 %sgpr7 = S_MOV_B32 61440
489 %sgpr6 = S_MOV_B32 -1
490 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
491
492 bb.2.end:
493 liveins: %vgpr0, %sgpr0_sgpr1
494
495 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
496 %sgpr0 = S_MOV_B32 0
497 %sgpr1 = S_MOV_B32 1
498 %sgpr2 = S_MOV_B32 -1
499 %sgpr3 = S_MOV_B32 61440
500 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
501 S_ENDPGM
502
503...
504---
505# CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}}
506# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
507# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
508# CHECK-NEXT: %exec = COPY %sgpr2_sgpr3
509# CHECK-NEXT: SI_MASK_BRANCH
510name: optimize_if_and_saveexec_xor_live_out_setexec
511alignment: 0
512exposesReturnsTwice: false
513legalized: false
514regBankSelected: false
515selected: false
516tracksRegLiveness: true
517liveins:
518 - { reg: '%vgpr0' }
519frameInfo:
520 isFrameAddressTaken: false
521 isReturnAddressTaken: false
522 hasStackMap: false
523 hasPatchPoint: false
524 stackSize: 0
525 offsetAdjustment: 0
526 maxAlignment: 0
527 adjustsStack: false
528 hasCalls: false
529 maxCallFrameSize: 0
530 hasOpaqueSPAdjustment: false
531 hasVAStart: false
532 hasMustTailInVarArgFunc: false
533body: |
534 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000535 liveins: %vgpr0
536
537 %sgpr0_sgpr1 = COPY %exec
538 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
539 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
540 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
541 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc
542 %exec = S_MOV_B64_term %sgpr2_sgpr3
543 SI_MASK_BRANCH %bb.2.end, implicit %exec
544 S_BRANCH %bb.1.if
545
546 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000547 liveins: %sgpr0_sgpr1, %sgpr2_sgpr3
548 S_SLEEP 0, implicit %sgpr2_sgpr3
549 %sgpr7 = S_MOV_B32 61440
550 %sgpr6 = S_MOV_B32 -1
551 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
552
553 bb.2.end:
554 liveins: %vgpr0, %sgpr0_sgpr1
555
556 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
557 %sgpr3 = S_MOV_B32 61440
558 %sgpr2 = S_MOV_B32 -1
559 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
560 S_ENDPGM
561
562...
563
564# CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}}
565# CHECK: %sgpr0_sgpr1 = COPY %exec
566# CHECK: %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc
567# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3
568# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec
569
570name: optimize_if_unknown_saveexec
571alignment: 0
572exposesReturnsTwice: false
573legalized: false
574regBankSelected: false
575selected: false
576tracksRegLiveness: true
577liveins:
578 - { reg: '%vgpr0' }
579frameInfo:
580 isFrameAddressTaken: false
581 isReturnAddressTaken: false
582 hasStackMap: false
583 hasPatchPoint: false
584 stackSize: 0
585 offsetAdjustment: 0
586 maxAlignment: 0
587 adjustsStack: false
588 hasCalls: false
589 maxCallFrameSize: 0
590 hasOpaqueSPAdjustment: false
591 hasVAStart: false
592 hasMustTailInVarArgFunc: false
593body: |
594 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000595 liveins: %vgpr0
596
597 %sgpr0_sgpr1 = COPY %exec
598 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
599 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
600 %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc
601 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
602 SI_MASK_BRANCH %bb.2.end, implicit %exec
603 S_BRANCH %bb.1.if
604
605 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000606 liveins: %sgpr0_sgpr1
607
608 %sgpr7 = S_MOV_B32 61440
609 %sgpr6 = S_MOV_B32 -1
610 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
611
612 bb.2.end:
613 liveins: %vgpr0, %sgpr0_sgpr1
614
615 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
616 %sgpr3 = S_MOV_B32 61440
617 %sgpr2 = S_MOV_B32 -1
618 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
619 S_ENDPGM
620
621...
622---
623# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
624# CHECK: %sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
625# CHECK-NEXT: SI_MASK_BRANCH
626
627name: optimize_if_andn2_saveexec
628alignment: 0
629exposesReturnsTwice: false
630legalized: false
631regBankSelected: false
632selected: false
633tracksRegLiveness: true
634liveins:
635 - { reg: '%vgpr0' }
636frameInfo:
637 isFrameAddressTaken: false
638 isReturnAddressTaken: false
639 hasStackMap: false
640 hasPatchPoint: false
641 stackSize: 0
642 offsetAdjustment: 0
643 maxAlignment: 0
644 adjustsStack: false
645 hasCalls: false
646 maxCallFrameSize: 0
647 hasOpaqueSPAdjustment: false
648 hasVAStart: false
649 hasMustTailInVarArgFunc: false
650body: |
651 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000652 liveins: %vgpr0
653
654 %sgpr0_sgpr1 = COPY %exec
655 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
656 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
657 %sgpr2_sgpr3 = S_ANDN2_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
658 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
659 SI_MASK_BRANCH %bb.2.end, implicit %exec
660 S_BRANCH %bb.1.if
661
662 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000663 liveins: %sgpr0_sgpr1
664
665 %sgpr7 = S_MOV_B32 61440
666 %sgpr6 = S_MOV_B32 -1
667 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
668
669 bb.2.end:
670 liveins: %vgpr0, %sgpr0_sgpr1
671
672 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
673 %sgpr3 = S_MOV_B32 61440
674 %sgpr2 = S_MOV_B32 -1
675 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
676 S_ENDPGM
677
678...
679---
680# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
681# CHECK: %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc
682# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3
683# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec
684name: optimize_if_andn2_saveexec_no_commute
685alignment: 0
686exposesReturnsTwice: false
687legalized: false
688regBankSelected: false
689selected: false
690tracksRegLiveness: true
691liveins:
692 - { reg: '%vgpr0' }
693frameInfo:
694 isFrameAddressTaken: false
695 isReturnAddressTaken: false
696 hasStackMap: false
697 hasPatchPoint: false
698 stackSize: 0
699 offsetAdjustment: 0
700 maxAlignment: 0
701 adjustsStack: false
702 hasCalls: false
703 maxCallFrameSize: 0
704 hasOpaqueSPAdjustment: false
705 hasVAStart: false
706 hasMustTailInVarArgFunc: false
707body: |
708 bb.0.main_body:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000709 liveins: %vgpr0
710
711 %sgpr0_sgpr1 = COPY %exec
712 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec
713 %vgpr0 = V_MOV_B32_e32 4, implicit %exec
714 %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc
715 %exec = S_MOV_B64_term killed %sgpr2_sgpr3
716 SI_MASK_BRANCH %bb.2.end, implicit %exec
717 S_BRANCH %bb.1.if
718
719 bb.1.if:
Matt Arsenaulte6740752016-09-29 01:44:16 +0000720 liveins: %sgpr0_sgpr1
721
722 %sgpr7 = S_MOV_B32 61440
723 %sgpr6 = S_MOV_B32 -1
724 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
725
726 bb.2.end:
727 liveins: %vgpr0, %sgpr0_sgpr1
728
729 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
730 %sgpr3 = S_MOV_B32 61440
731 %sgpr2 = S_MOV_B32 -1
732 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`)
733 S_ENDPGM
734
735...