blob: f7002cc12a67a8e85d12480c18d925a0f90720a0 [file] [log] [blame]
Nicolai Haehnle6cf306d2018-02-23 10:45:56 +00001# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s
2
3# Check that SILoadStoreOptimizer honors physregs defs/uses between moved
4# instructions.
5#
6# The following IR snippet would usually be optimized by the peephole optimizer.
7# However, an equivalent situation can occur with buffer instructions as well.
8
9# CHECK-LABEL: name: scc_def_and_use_no_dependency
10# CHECK: S_ADD_U32
11# CHECK: S_ADDC_U32
12# CHECK: DS_READ2_B32
13--- |
14 define amdgpu_kernel void @scc_def_and_use_no_dependency(i32 addrspace(3)* %ptr.0) nounwind {
15 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
16 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
17 ret void
18 }
19
20 define amdgpu_kernel void @scc_def_and_use_dependency(i32 addrspace(3)* %ptr.0) nounwind {
21 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
22 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
23 ret void
24 }
25...
26---
27name: scc_def_and_use_no_dependency
28alignment: 0
29exposesReturnsTwice: false
30legalized: false
31regBankSelected: false
32selected: false
33tracksRegLiveness: false
34liveins:
35 - { reg: '$vgpr0' }
36 - { reg: '$sgpr0' }
37frameInfo:
38 isFrameAddressTaken: false
39 isReturnAddressTaken: false
40 hasStackMap: false
41 hasPatchPoint: false
42 stackSize: 0
43 offsetAdjustment: 0
44 maxAlignment: 0
45 adjustsStack: false
46 hasCalls: false
47 maxCallFrameSize: 0
48 hasOpaqueSPAdjustment: false
49 hasVAStart: false
50 hasMustTailInVarArgFunc: false
51body: |
52 bb.0:
53 liveins: $vgpr0, $sgpr0
54
55 %1:vgpr_32 = COPY $vgpr0
56 %10:sgpr_32 = COPY $sgpr0
57
58 $m0 = S_MOV_B32 -1
59 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0)
60
61 %11:sgpr_32 = S_ADD_U32 %10, 4, implicit-def $scc
62 %12:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc
63
64 %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64)
65 S_ENDPGM
66
67...
68
69# CHECK-LABEL: name: scc_def_and_use_dependency
70# CHECK: DS_READ2_B32
71# CHECK: S_ADD_U32
72# CHECK: S_ADDC_U32
73---
74name: scc_def_and_use_dependency
75alignment: 0
76exposesReturnsTwice: false
77legalized: false
78regBankSelected: false
79selected: false
80tracksRegLiveness: false
81liveins:
82 - { reg: '$vgpr0' }
83 - { reg: '$sgpr0' }
84frameInfo:
85 isFrameAddressTaken: false
86 isReturnAddressTaken: false
87 hasStackMap: false
88 hasPatchPoint: false
89 stackSize: 0
90 offsetAdjustment: 0
91 maxAlignment: 0
92 adjustsStack: false
93 hasCalls: false
94 maxCallFrameSize: 0
95 hasOpaqueSPAdjustment: false
96 hasVAStart: false
97 hasMustTailInVarArgFunc: false
98body: |
99 bb.0:
100 liveins: $vgpr0, $sgpr0
101
102 %1:vgpr_32 = COPY $vgpr0
103 %10:sgpr_32 = COPY $sgpr0
104
105 $m0 = S_MOV_B32 -1
106 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0)
107 %20:sgpr_32 = V_READFIRSTLANE_B32 %2, implicit $exec
108
109 %21:sgpr_32 = S_ADD_U32 %20, 4, implicit-def $scc
110 ; The S_ADDC_U32 depends on the first DS_READ_B32 only via SCC
111 %11:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc
112
113 %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64)
114 S_ENDPGM
115
116...