blob: afb2ef803f014455bef8ad99bb638b47df18b711 [file] [log] [blame]
Tim Renouf4f703f52018-08-21 11:07:10 +00001;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s
2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
3
4;CHECK-LABEL: {{^}}buffer_store:
5;CHECK-NOT: s_waitcnt
6;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7;CHECK: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
8;CHECK: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
9define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
10main_body:
11 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i32 0)
12 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 1)
13 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 2)
14 ret void
15}
16
17;CHECK-LABEL: {{^}}buffer_store_immoffs:
18;CHECK-NOT: s_waitcnt
19;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
20define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
21main_body:
22 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 42, i32 0, i32 0)
23 ret void
24}
25
26;CHECK-LABEL: {{^}}buffer_store_ofs:
27;CHECK-NOT: s_waitcnt
28;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
29define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
30main_body:
31 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0)
32 ret void
33}
34
35; Ideally, the register allocator would avoid the wait here
36;
37;CHECK-LABEL: {{^}}buffer_store_wait:
38;CHECK-NOT: s_waitcnt
39;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
40;VERDE: s_waitcnt expcnt(0)
41;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen
42;CHECK: s_waitcnt vmcnt(0)
43;CHECK: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen
44define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) {
45main_body:
46 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0)
47 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 %3, i32 0, i32 0)
48 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i32 0)
49 ret void
50}
51
52;CHECK-LABEL: {{^}}buffer_store_x1:
53;CHECK-NOT: s_waitcnt
54;CHECK: buffer_store_dword v0, v1, s[0:3], 0 offen
55define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %offset) {
56main_body:
57 call void @llvm.amdgcn.raw.buffer.store.f32(float %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
58 ret void
59}
60
61;CHECK-LABEL: {{^}}buffer_store_x2:
62;CHECK-NOT: s_waitcnt
63;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
64define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %offset) #0 {
65main_body:
66 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
67 ret void
68}
69
70;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged:
71;CHECK-NOT: s_waitcnt
72;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
73;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
74define amdgpu_ps void @buffer_store_x1_offen_merged(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
75 %a1 = add i32 %a, 4
76 %a2 = add i32 %a, 8
77 %a3 = add i32 %a, 12
78 %a4 = add i32 %a, 16
79 %a5 = add i32 %a, 28
80 %a6 = add i32 %a, 32
81 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
82 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
83 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 0)
84 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 0)
85 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 0)
86 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 0)
87 ret void
88}
89
90;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_glc_slc:
91;CHECK-NOT: s_waitcnt
92;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}}
93;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:12 glc{{$}}
94;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 glc slc{{$}}
95define amdgpu_ps void @buffer_store_x1_offen_merged_glc_slc(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
96 %a1 = add i32 %a, 4
97 %a2 = add i32 %a, 8
98 %a3 = add i32 %a, 12
99 %a4 = add i32 %a, 16
100 %a5 = add i32 %a, 28
101 %a6 = add i32 %a, 32
102 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
103 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
104 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 1)
105 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 1)
106 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 3)
107 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 3)
108 ret void
109}
110
111;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged:
112;CHECK-NOT: s_waitcnt
113;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
114define amdgpu_ps void @buffer_store_x2_offen_merged(<4 x i32> inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) {
115 %a1 = add i32 %a, 4
116 %a2 = add i32 %a, 12
117 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
118 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
119 ret void
120}
121
122;CHECK-LABEL: {{^}}buffer_store_x1_offset_merged:
123;CHECK-NOT: s_waitcnt
124;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
125;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
126define amdgpu_ps void @buffer_store_x1_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
127 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
128 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 0)
129 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 0)
130 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 0)
131 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 0)
132 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 0)
133 ret void
134}
135
136;CHECK-LABEL: {{^}}buffer_store_x2_offset_merged:
137;CHECK-NOT: s_waitcnt
138;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
139define amdgpu_ps void @buffer_store_x2_offset_merged(<4 x i32> inreg %rsrc, <2 x float> %v1,<2 x float> %v2) {
140 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
141 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 12, i32 0, i32 0)
142 ret void
143}
144
Tim Renoufbb5ee412018-08-21 11:08:12 +0000145;CHECK-LABEL: {{^}}buffer_store_int:
146;CHECK-NOT: s_waitcnt
147;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
148;CHECK: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc
149;CHECK: buffer_store_dword v6, off, s[0:3], 0 slc
150define amdgpu_ps void @buffer_store_int(<4 x i32> inreg, <4 x i32>, <2 x i32>, i32) {
151main_body:
152 call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %1, <4 x i32> %0, i32 0, i32 0, i32 0)
153 call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %2, <4 x i32> %0, i32 0, i32 0, i32 1)
154 call void @llvm.amdgcn.raw.buffer.store.i32(i32 %3, <4 x i32> %0, i32 0, i32 0, i32 2)
155 ret void
156}
157
Tim Renouf4f703f52018-08-21 11:07:10 +0000158declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
159declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
160declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
Tim Renoufbb5ee412018-08-21 11:08:12 +0000161declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0
162declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0
163declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0
Tim Renouf4f703f52018-08-21 11:07:10 +0000164declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
165
166attributes #0 = { nounwind }
167attributes #1 = { nounwind readonly }