blob: d61cefd5c7391f0d75dcd75de44340da36405374 [file] [log] [blame]
Nicolai Haehnledd059c12017-11-22 12:25:21 +00001# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s
2
3# Check that SILoadStoreOptimizer honors memory dependencies between moved
4# instructions.
5#
6# The following IR snippet would usually be optimized by the peephole optimizer.
7# However, an equivalent situation can occur with buffer instructions as well.
8
9# CHECK-LABEL: name: mem_dependency
10# CHECK: DS_READ2_B32 %0, 0, 1,
11# CHECK: DS_WRITE_B32 %0, killed %1, 64,
12# CHECK: DS_READ2_B32 %0, 16, 17,
13# CHECK: DS_WRITE_B32 killed %0, %5, 0
14
15--- |
16 define amdgpu_kernel void @mem_dependency(i32 addrspace(3)* %ptr.0) nounwind {
17 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
18 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
19 %1 = load i32, i32 addrspace(3)* %ptr.0
20 store i32 %1, i32 addrspace(3)* %ptr.64
21 %2 = load i32, i32 addrspace(3)* %ptr.64
22 %3 = load i32, i32 addrspace(3)* %ptr.4
23 %4 = add i32 %2, %3
24 store i32 %4, i32 addrspace(3)* %ptr.0
25 ret void
26 }
27...
28---
29name: mem_dependency
30alignment: 0
31exposesReturnsTwice: false
32legalized: false
33regBankSelected: false
34selected: false
35tracksRegLiveness: true
36liveins:
37 - { reg: '%vgpr0', virtual-reg: '%1' }
38frameInfo:
39 isFrameAddressTaken: false
40 isReturnAddressTaken: false
41 hasStackMap: false
42 hasPatchPoint: false
43 stackSize: 0
44 offsetAdjustment: 0
45 maxAlignment: 0
46 adjustsStack: false
47 hasCalls: false
48 maxCallFrameSize: 0
49 hasOpaqueSPAdjustment: false
50 hasVAStart: false
51 hasMustTailInVarArgFunc: false
52body: |
53 bb.0:
54 liveins: %vgpr0
55
56 %1:vgpr_32 = COPY %vgpr0
57 %m0 = S_MOV_B32 -1
58 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit %m0, implicit %exec :: (load 4 from %ir.ptr.0)
59 DS_WRITE_B32 %1, killed %2, 64, 0, implicit %m0, implicit %exec :: (store 4 into %ir.ptr.64)
60
61 ; Make this load unmergeable, to tempt SILoadStoreOptimizer into merging the
62 ; other two loads.
63 %6:vreg_64 = DS_READ2_B32 %1, 16, 17, 0, implicit %m0, implicit %exec :: (load 8 from %ir.ptr.64, align 4)
64 %3:vgpr_32 = COPY %6.sub0
65 %4:vgpr_32 = DS_READ_B32 %1, 4, 0, implicit %m0, implicit %exec :: (load 4 from %ir.ptr.4)
66 %5:vgpr_32 = V_ADD_I32_e32 killed %3, killed %4, implicit-def %vcc, implicit %exec
67 DS_WRITE_B32 killed %1, %5, 0, 0, implicit killed %m0, implicit %exec :: (store 4 into %ir.ptr.0)
68 S_ENDPGM
69
70...