Nicolai Haehnle | 6cf306d | 2018-02-23 10:45:56 +0000 | [diff] [blame] | 1 | # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s |
| 2 | |
| 3 | # Check that SILoadStoreOptimizer honors physregs defs/uses between moved |
| 4 | # instructions. |
| 5 | # |
| 6 | # The following IR snippet would usually be optimized by the peephole optimizer. |
| 7 | # However, an equivalent situation can occur with buffer instructions as well. |
| 8 | |
| 9 | # CHECK-LABEL: name: scc_def_and_use_no_dependency |
| 10 | # CHECK: S_ADD_U32 |
| 11 | # CHECK: S_ADDC_U32 |
| 12 | # CHECK: DS_READ2_B32 |
| 13 | --- | |
| 14 | define amdgpu_kernel void @scc_def_and_use_no_dependency(i32 addrspace(3)* %ptr.0) nounwind { |
| 15 | %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1 |
| 16 | %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16 |
| 17 | ret void |
| 18 | } |
| 19 | |
| 20 | define amdgpu_kernel void @scc_def_and_use_dependency(i32 addrspace(3)* %ptr.0) nounwind { |
| 21 | %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1 |
| 22 | %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16 |
| 23 | ret void |
| 24 | } |
| 25 | ... |
| 26 | --- |
| 27 | name: scc_def_and_use_no_dependency |
| 28 | alignment: 0 |
| 29 | exposesReturnsTwice: false |
| 30 | legalized: false |
| 31 | regBankSelected: false |
| 32 | selected: false |
| 33 | tracksRegLiveness: false |
| 34 | liveins: |
| 35 | - { reg: '$vgpr0' } |
| 36 | - { reg: '$sgpr0' } |
| 37 | frameInfo: |
| 38 | isFrameAddressTaken: false |
| 39 | isReturnAddressTaken: false |
| 40 | hasStackMap: false |
| 41 | hasPatchPoint: false |
| 42 | stackSize: 0 |
| 43 | offsetAdjustment: 0 |
| 44 | maxAlignment: 0 |
| 45 | adjustsStack: false |
| 46 | hasCalls: false |
| 47 | maxCallFrameSize: 0 |
| 48 | hasOpaqueSPAdjustment: false |
| 49 | hasVAStart: false |
| 50 | hasMustTailInVarArgFunc: false |
| 51 | body: | |
| 52 | bb.0: |
| 53 | liveins: $vgpr0, $sgpr0 |
| 54 | |
| 55 | %1:vgpr_32 = COPY $vgpr0 |
| 56 | %10:sgpr_32 = COPY $sgpr0 |
| 57 | |
| 58 | $m0 = S_MOV_B32 -1 |
| 59 | %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0) |
| 60 | |
| 61 | %11:sgpr_32 = S_ADD_U32 %10, 4, implicit-def $scc |
| 62 | %12:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc |
| 63 | |
| 64 | %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64) |
| 65 | S_ENDPGM |
| 66 | |
| 67 | ... |
| 68 | |
| 69 | # CHECK-LABEL: name: scc_def_and_use_dependency |
| 70 | # CHECK: DS_READ2_B32 |
| 71 | # CHECK: S_ADD_U32 |
| 72 | # CHECK: S_ADDC_U32 |
| 73 | --- |
| 74 | name: scc_def_and_use_dependency |
| 75 | alignment: 0 |
| 76 | exposesReturnsTwice: false |
| 77 | legalized: false |
| 78 | regBankSelected: false |
| 79 | selected: false |
| 80 | tracksRegLiveness: false |
| 81 | liveins: |
| 82 | - { reg: '$vgpr0' } |
| 83 | - { reg: '$sgpr0' } |
| 84 | frameInfo: |
| 85 | isFrameAddressTaken: false |
| 86 | isReturnAddressTaken: false |
| 87 | hasStackMap: false |
| 88 | hasPatchPoint: false |
| 89 | stackSize: 0 |
| 90 | offsetAdjustment: 0 |
| 91 | maxAlignment: 0 |
| 92 | adjustsStack: false |
| 93 | hasCalls: false |
| 94 | maxCallFrameSize: 0 |
| 95 | hasOpaqueSPAdjustment: false |
| 96 | hasVAStart: false |
| 97 | hasMustTailInVarArgFunc: false |
| 98 | body: | |
| 99 | bb.0: |
| 100 | liveins: $vgpr0, $sgpr0 |
| 101 | |
| 102 | %1:vgpr_32 = COPY $vgpr0 |
| 103 | %10:sgpr_32 = COPY $sgpr0 |
| 104 | |
| 105 | $m0 = S_MOV_B32 -1 |
| 106 | %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0) |
| 107 | %20:sgpr_32 = V_READFIRSTLANE_B32 %2, implicit $exec |
| 108 | |
| 109 | %21:sgpr_32 = S_ADD_U32 %20, 4, implicit-def $scc |
| 110 | ; The S_ADDC_U32 depends on the first DS_READ_B32 only via SCC |
| 111 | %11:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc |
| 112 | |
| 113 | %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64) |
| 114 | S_ENDPGM |
| 115 | |
| 116 | ... |