blob: 78ed24972578ae04517032d2d8d5808b0df2ebc9 [file] [log] [blame]
Nicolai Haehnledd059c12017-11-22 12:25:21 +00001# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s
2
3# Check that SILoadStoreOptimizer honors memory dependencies between moved
4# instructions.
5#
6# The following IR snippet would usually be optimized by the peephole optimizer.
7# However, an equivalent situation can occur with buffer instructions as well.
8
9# CHECK-LABEL: name: mem_dependency
10# CHECK: DS_READ2_B32 %0, 0, 1,
11# CHECK: DS_WRITE_B32 %0, killed %1, 64,
12# CHECK: DS_READ2_B32 %0, 16, 17,
13# CHECK: DS_WRITE_B32 killed %0, %5, 0
14
15--- |
16 define amdgpu_kernel void @mem_dependency(i32 addrspace(3)* %ptr.0) nounwind {
17 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
18 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
19 %1 = load i32, i32 addrspace(3)* %ptr.0
20 store i32 %1, i32 addrspace(3)* %ptr.64
21 %2 = load i32, i32 addrspace(3)* %ptr.64
22 %3 = load i32, i32 addrspace(3)* %ptr.4
23 %4 = add i32 %2, %3
24 store i32 %4, i32 addrspace(3)* %ptr.0
25 ret void
26 }
27...
28---
29name: mem_dependency
30alignment: 0
31exposesReturnsTwice: false
32legalized: false
33regBankSelected: false
34selected: false
35tracksRegLiveness: true
36liveins:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000037 - { reg: '$vgpr0', virtual-reg: '%1' }
Nicolai Haehnledd059c12017-11-22 12:25:21 +000038frameInfo:
39 isFrameAddressTaken: false
40 isReturnAddressTaken: false
41 hasStackMap: false
42 hasPatchPoint: false
43 stackSize: 0
44 offsetAdjustment: 0
45 maxAlignment: 0
46 adjustsStack: false
47 hasCalls: false
48 maxCallFrameSize: 0
49 hasOpaqueSPAdjustment: false
50 hasVAStart: false
51 hasMustTailInVarArgFunc: false
52body: |
53 bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000054 liveins: $vgpr0
Nicolai Haehnledd059c12017-11-22 12:25:21 +000055
Puyan Lotfi43e94b12018-01-31 22:04:26 +000056 %1:vgpr_32 = COPY $vgpr0
57 $m0 = S_MOV_B32 -1
58 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0)
59 DS_WRITE_B32 %1, killed %2, 64, 0, implicit $m0, implicit $exec :: (store 4 into %ir.ptr.64)
Nicolai Haehnledd059c12017-11-22 12:25:21 +000060
61 ; Make this load unmergeable, to tempt SILoadStoreOptimizer into merging the
62 ; other two loads.
Puyan Lotfi43e94b12018-01-31 22:04:26 +000063 %6:vreg_64 = DS_READ2_B32 %1, 16, 17, 0, implicit $m0, implicit $exec :: (load 8 from %ir.ptr.64, align 4)
Nicolai Haehnledd059c12017-11-22 12:25:21 +000064 %3:vgpr_32 = COPY %6.sub0
Puyan Lotfi43e94b12018-01-31 22:04:26 +000065 %4:vgpr_32 = DS_READ_B32 %1, 4, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.4)
66 %5:vgpr_32 = V_ADD_I32_e32 killed %3, killed %4, implicit-def $vcc, implicit $exec
67 DS_WRITE_B32 killed %1, %5, 0, 0, implicit killed $m0, implicit $exec :: (store 4 into %ir.ptr.0)
Nicolai Haehnledd059c12017-11-22 12:25:21 +000068 S_ENDPGM
69
70...