blob: 7e2996ea92f419266d4f388c6db8ab8a44eb1584 [file] [log] [blame]
Mark Searles2a19af62018-04-26 16:11:19 +00001;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +00002;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
3
4;CHECK-LABEL: {{^}}buffer_store:
Marek Olsak5cec6412017-11-09 01:52:48 +00005;CHECK-NOT: s_waitcnt
Nikolay Haustov4f672a32016-04-29 09:02:30 +00006;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7;CHECK: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
8;CHECK: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +00009define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
10main_body:
11 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
12 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
13 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
14 ret void
15}
16
17;CHECK-LABEL: {{^}}buffer_store_immoffs:
Marek Olsak5cec6412017-11-09 01:52:48 +000018;CHECK-NOT: s_waitcnt
Nikolay Haustov4f672a32016-04-29 09:02:30 +000019;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000020define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
21main_body:
22 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
23 ret void
24}
25
26;CHECK-LABEL: {{^}}buffer_store_idx:
Marek Olsak5cec6412017-11-09 01:52:48 +000027;CHECK-NOT: s_waitcnt
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000028;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen
29define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) {
30main_body:
31 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
32 ret void
33}
34
35;CHECK-LABEL: {{^}}buffer_store_ofs:
Marek Olsak5cec6412017-11-09 01:52:48 +000036;CHECK-NOT: s_waitcnt
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000037;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
38define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
39main_body:
40 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 %2, i1 0, i1 0)
41 ret void
42}
43
44;CHECK-LABEL: {{^}}buffer_store_both:
Marek Olsak5cec6412017-11-09 01:52:48 +000045;CHECK-NOT: s_waitcnt
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000046;CHECK: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 idxen offen
47define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) {
48main_body:
49 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 %3, i1 0, i1 0)
50 ret void
51}
52
53;CHECK-LABEL: {{^}}buffer_store_both_reversed:
54;CHECK: v_mov_b32_e32 v6, v4
Marek Olsak5cec6412017-11-09 01:52:48 +000055;CHECK-NOT: s_waitcnt
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000056;CHECK: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 idxen offen
57define amdgpu_ps void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) {
58main_body:
59 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %3, i32 %2, i1 0, i1 0)
60 ret void
61}
62
63; Ideally, the register allocator would avoid the wait here
64;
65;CHECK-LABEL: {{^}}buffer_store_wait:
Marek Olsak5cec6412017-11-09 01:52:48 +000066;CHECK-NOT: s_waitcnt
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000067;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen
Mark Searles2a19af62018-04-26 16:11:19 +000068;VERDE: s_waitcnt expcnt(0)
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000069;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 idxen
70;CHECK: s_waitcnt vmcnt(0)
71;CHECK: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 idxen
72define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) {
73main_body:
74 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
75 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0)
76 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i1 0, i1 0)
77 ret void
78}
79
80;CHECK-LABEL: {{^}}buffer_store_x1:
Marek Olsak5cec6412017-11-09 01:52:48 +000081;CHECK-NOT: s_waitcnt
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000082;CHECK: buffer_store_dword v0, v1, s[0:3], 0 idxen
83define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %index) {
84main_body:
85 call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0)
86 ret void
87}
88
89;CHECK-LABEL: {{^}}buffer_store_x2:
Marek Olsak5cec6412017-11-09 01:52:48 +000090;CHECK-NOT: s_waitcnt
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000091;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 idxen
92define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %index) #0 {
93main_body:
94 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0)
95 ret void
96}
97
Marek Olsak58410f32017-11-09 01:52:55 +000098;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged:
99;CHECK-NOT: s_waitcnt
100;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
101;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
102define amdgpu_ps void @buffer_store_x1_offen_merged(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
103 %a1 = add i32 %a, 4
104 %a2 = add i32 %a, 8
105 %a3 = add i32 %a, 12
106 %a4 = add i32 %a, 16
107 %a5 = add i32 %a, 28
108 %a6 = add i32 %a, 32
109 call void @llvm.amdgcn.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 0, i32 %a1, i1 0, i1 0)
110 call void @llvm.amdgcn.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 0, i32 %a2, i1 0, i1 0)
111 call void @llvm.amdgcn.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 0, i32 %a3, i1 0, i1 0)
112 call void @llvm.amdgcn.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 0, i32 %a4, i1 0, i1 0)
113 call void @llvm.amdgcn.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 0, i32 %a5, i1 0, i1 0)
114 call void @llvm.amdgcn.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 0, i32 %a6, i1 0, i1 0)
115 ret void
116}
117
118;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_glc_slc:
119;CHECK-NOT: s_waitcnt
120;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}}
121;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:12 glc{{$}}
122;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 glc slc{{$}}
123define amdgpu_ps void @buffer_store_x1_offen_merged_glc_slc(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
124 %a1 = add i32 %a, 4
125 %a2 = add i32 %a, 8
126 %a3 = add i32 %a, 12
127 %a4 = add i32 %a, 16
128 %a5 = add i32 %a, 28
129 %a6 = add i32 %a, 32
130 call void @llvm.amdgcn.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 0, i32 %a1, i1 0, i1 0)
131 call void @llvm.amdgcn.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 0, i32 %a2, i1 0, i1 0)
132 call void @llvm.amdgcn.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 0, i32 %a3, i1 1, i1 0)
133 call void @llvm.amdgcn.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 0, i32 %a4, i1 1, i1 0)
134 call void @llvm.amdgcn.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 0, i32 %a5, i1 1, i1 1)
135 call void @llvm.amdgcn.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 0, i32 %a6, i1 1, i1 1)
136 ret void
137}
138
139;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged:
140;CHECK-NOT: s_waitcnt
141;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
142define amdgpu_ps void @buffer_store_x2_offen_merged(<4 x i32> inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) {
143 %a1 = add i32 %a, 4
144 %a2 = add i32 %a, 12
145 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 0, i32 %a1, i1 0, i1 0)
146 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 0, i32 %a2, i1 0, i1 0)
147 ret void
148}
149
Neil Henning76504a42018-12-12 16:15:21 +0000150;CHECK-LABEL: {{^}}buffer_store_x3_offen_merged:
151;CHECK-NOT: s_waitcnt
152;CHECK: buffer_store_dwordx3 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
153define amdgpu_ps void @buffer_store_x3_offen_merged(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3) {
154 %a1 = add i32 %a, 28
155 %a2 = add i32 %a, 32
156 %a3 = add i32 %a, 36
157 call void @llvm.amdgcn.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 0, i32 %a1, i1 0, i1 0)
158 call void @llvm.amdgcn.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 0, i32 %a2, i1 0, i1 0)
159 call void @llvm.amdgcn.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 0, i32 %a3, i1 0, i1 0)
160 ret void
161}
162
163;CHECK-LABEL: {{^}}buffer_store_x3_offen_merged2:
164;CHECK-NOT: s_waitcnt
165;CHECK: buffer_store_dwordx3 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
166define amdgpu_ps void @buffer_store_x3_offen_merged2(<4 x i32> inreg %rsrc, i32 %a, <2 x float> %v1, float %v2) {
167 %a1 = add i32 %a, 4
168 %a2 = add i32 %a, 12
169 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 0, i32 %a1, i1 0, i1 0)
170 call void @llvm.amdgcn.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 0, i32 %a2, i1 0, i1 0)
171 ret void
172}
173
174;CHECK-LABEL: {{^}}buffer_store_x3_offen_merged3:
175;CHECK-NOT: s_waitcnt
176;CHECK: buffer_store_dwordx3 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
177define amdgpu_ps void @buffer_store_x3_offen_merged3(<4 x i32> inreg %rsrc, i32 %a, float %v1, <2 x float> %v2) {
178 %a1 = add i32 %a, 4
179 %a2 = add i32 %a, 8
180 call void @llvm.amdgcn.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 0, i32 %a1, i1 0, i1 0)
181 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 0, i32 %a2, i1 0, i1 0)
182 ret void
183}
184
Marek Olsak58410f32017-11-09 01:52:55 +0000185;CHECK-LABEL: {{^}}buffer_store_x1_offset_merged:
186;CHECK-NOT: s_waitcnt
187;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
188;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
189define amdgpu_ps void @buffer_store_x1_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
190 call void @llvm.amdgcn.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 0, i32 4, i1 0, i1 0)
191 call void @llvm.amdgcn.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 0, i32 8, i1 0, i1 0)
192 call void @llvm.amdgcn.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 0, i32 12, i1 0, i1 0)
193 call void @llvm.amdgcn.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 0, i32 16, i1 0, i1 0)
194 call void @llvm.amdgcn.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 0, i32 28, i1 0, i1 0)
195 call void @llvm.amdgcn.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 0, i32 32, i1 0, i1 0)
196 ret void
197}
198
199;CHECK-LABEL: {{^}}buffer_store_x2_offset_merged:
200;CHECK-NOT: s_waitcnt
201;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
Neil Henning76504a42018-12-12 16:15:21 +0000202define amdgpu_ps void @buffer_store_x2_offset_merged(<4 x i32> inreg %rsrc, <2 x float> %v1, <2 x float> %v2) {
Marek Olsak58410f32017-11-09 01:52:55 +0000203 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 0, i32 4, i1 0, i1 0)
204 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 0, i32 12, i1 0, i1 0)
205 ret void
206}
207
Neil Henning76504a42018-12-12 16:15:21 +0000208;CHECK-LABEL: {{^}}buffer_store_x3_offset_merged:
209;CHECK-NOT: s_waitcnt
210;CHECK-DAG: buffer_store_dwordx3 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
211define amdgpu_ps void @buffer_store_x3_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3) {
212 call void @llvm.amdgcn.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 0, i32 4, i1 0, i1 0)
213 call void @llvm.amdgcn.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 0, i32 8, i1 0, i1 0)
214 call void @llvm.amdgcn.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 0, i32 12, i1 0, i1 0)
215 ret void
216}
217
218;CHECK-LABEL: {{^}}buffer_store_x3_offset_merged2:
219;CHECK-NOT: s_waitcnt
220;CHECK-DAG: buffer_store_dwordx3 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
221define amdgpu_ps void @buffer_store_x3_offset_merged2(<4 x i32> inreg %rsrc, float %v1, <2 x float> %v2) {
222 call void @llvm.amdgcn.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 0, i32 4, i1 0, i1 0)
223 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 0, i32 8, i1 0, i1 0)
224 ret void
225}
226
227;CHECK-LABEL: {{^}}buffer_store_x3_offset_merged3:
228;CHECK-NOT: s_waitcnt
229;CHECK-DAG: buffer_store_dwordx3 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:8
230define amdgpu_ps void @buffer_store_x3_offset_merged3(<4 x i32> inreg %rsrc, <2 x float> %v1, float %v2) {
231 call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 0, i32 8, i1 0, i1 0)
232 call void @llvm.amdgcn.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 0, i32 16, i1 0, i1 0)
233 ret void
234}
235
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +0000236declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #0
237declare void @llvm.amdgcn.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i1, i1) #0
238declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #0
239declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1
240
241attributes #0 = { nounwind }
242attributes #1 = { nounwind readonly }