blob: 4f39867e46aba7b3bff95e13f69029612d709c33 [file] [log] [blame]
Tim Renouf4f703f52018-08-21 11:07:10 +00001;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s
2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
3
4;CHECK-LABEL: {{^}}buffer_store:
5;CHECK-NOT: s_waitcnt
6;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7;CHECK: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
8;CHECK: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
9define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
10main_body:
11 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i32 0)
12 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 1)
13 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 2)
14 ret void
15}
16
17;CHECK-LABEL: {{^}}buffer_store_immoffs:
18;CHECK-NOT: s_waitcnt
19;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
20define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
21main_body:
22 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 42, i32 0, i32 0)
23 ret void
24}
25
26;CHECK-LABEL: {{^}}buffer_store_ofs:
27;CHECK-NOT: s_waitcnt
28;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
29define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
30main_body:
31 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0)
32 ret void
33}
34
35; Ideally, the register allocator would avoid the wait here
36;
37;CHECK-LABEL: {{^}}buffer_store_wait:
38;CHECK-NOT: s_waitcnt
39;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
40;VERDE: s_waitcnt expcnt(0)
41;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen
42;CHECK: s_waitcnt vmcnt(0)
43;CHECK: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen
44define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) {
45main_body:
46 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0)
47 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 %3, i32 0, i32 0)
48 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i32 0)
49 ret void
50}
51
52;CHECK-LABEL: {{^}}buffer_store_x1:
53;CHECK-NOT: s_waitcnt
54;CHECK: buffer_store_dword v0, v1, s[0:3], 0 offen
55define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %offset) {
56main_body:
57 call void @llvm.amdgcn.raw.buffer.store.f32(float %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
58 ret void
59}
60
61;CHECK-LABEL: {{^}}buffer_store_x2:
62;CHECK-NOT: s_waitcnt
63;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
64define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %offset) #0 {
65main_body:
66 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
67 ret void
68}
69
Piotr Sobczak378131b2019-01-02 09:47:41 +000070;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_and:
Tim Renouf4f703f52018-08-21 11:07:10 +000071;CHECK-NOT: s_waitcnt
72;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
73;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
Piotr Sobczak378131b2019-01-02 09:47:41 +000074define amdgpu_ps void @buffer_store_x1_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
Tim Renouf4f703f52018-08-21 11:07:10 +000075 %a1 = add i32 %a, 4
76 %a2 = add i32 %a, 8
77 %a3 = add i32 %a, 12
78 %a4 = add i32 %a, 16
79 %a5 = add i32 %a, 28
80 %a6 = add i32 %a, 32
81 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
82 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
83 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 0)
84 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 0)
85 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 0)
86 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 0)
87 ret void
88}
89
Piotr Sobczak378131b2019-01-02 09:47:41 +000090;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_or:
91;CHECK-NOT: s_waitcnt
92;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
93;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:28
94define amdgpu_ps void @buffer_store_x1_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
95 %a = shl i32 %inp, 6
96 %a1 = add i32 %a, 4
97 %a2 = add i32 %a, 8
98 %a3 = add i32 %a, 12
99 %a4 = add i32 %a, 16
100 %a5 = add i32 %a, 28
101 %a6 = add i32 %a, 32
102 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
103 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
104 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 0)
105 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 0)
106 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 0)
107 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 0)
108 ret void
109}
110
111
Tim Renouf4f703f52018-08-21 11:07:10 +0000112;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_glc_slc:
113;CHECK-NOT: s_waitcnt
114;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}}
115;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:12 glc{{$}}
116;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 glc slc{{$}}
117define amdgpu_ps void @buffer_store_x1_offen_merged_glc_slc(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
118 %a1 = add i32 %a, 4
119 %a2 = add i32 %a, 8
120 %a3 = add i32 %a, 12
121 %a4 = add i32 %a, 16
122 %a5 = add i32 %a, 28
123 %a6 = add i32 %a, 32
124 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
125 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
126 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 1)
127 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 1)
128 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 3)
129 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 3)
130 ret void
131}
132
Piotr Sobczak378131b2019-01-02 09:47:41 +0000133;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged_and:
Tim Renouf4f703f52018-08-21 11:07:10 +0000134;CHECK-NOT: s_waitcnt
135;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
Piotr Sobczak378131b2019-01-02 09:47:41 +0000136define amdgpu_ps void @buffer_store_x2_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) {
137 %a1 = add i32 %a, 4
138 %a2 = add i32 %a, 12
139 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
140 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
141 ret void
142}
143
144;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged_or:
145;CHECK-NOT: s_waitcnt
146;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
147define amdgpu_ps void @buffer_store_x2_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp, <2 x float> %v1, <2 x float> %v2) {
148 %a = shl i32 %inp, 4
Tim Renouf4f703f52018-08-21 11:07:10 +0000149 %a1 = add i32 %a, 4
150 %a2 = add i32 %a, 12
151 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
152 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
153 ret void
154}
155
156;CHECK-LABEL: {{^}}buffer_store_x1_offset_merged:
157;CHECK-NOT: s_waitcnt
158;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
159;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
160define amdgpu_ps void @buffer_store_x1_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
161 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
162 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 0)
163 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 0)
164 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 0)
165 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 0)
166 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 0)
167 ret void
168}
169
170;CHECK-LABEL: {{^}}buffer_store_x2_offset_merged:
171;CHECK-NOT: s_waitcnt
172;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
173define amdgpu_ps void @buffer_store_x2_offset_merged(<4 x i32> inreg %rsrc, <2 x float> %v1,<2 x float> %v2) {
174 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
175 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 12, i32 0, i32 0)
176 ret void
177}
178
Tim Renoufbb5ee412018-08-21 11:08:12 +0000179;CHECK-LABEL: {{^}}buffer_store_int:
180;CHECK-NOT: s_waitcnt
181;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
182;CHECK: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc
183;CHECK: buffer_store_dword v6, off, s[0:3], 0 slc
184define amdgpu_ps void @buffer_store_int(<4 x i32> inreg, <4 x i32>, <2 x i32>, i32) {
185main_body:
186 call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %1, <4 x i32> %0, i32 0, i32 0, i32 0)
187 call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %2, <4 x i32> %0, i32 0, i32 0, i32 1)
188 call void @llvm.amdgcn.raw.buffer.store.i32(i32 %3, <4 x i32> %0, i32 0, i32 0, i32 2)
189 ret void
190}
191
Tim Renouf4f703f52018-08-21 11:07:10 +0000192declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
193declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
194declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
Tim Renoufbb5ee412018-08-21 11:08:12 +0000195declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0
196declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0
197declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0
Tim Renouf4f703f52018-08-21 11:07:10 +0000198declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
199
200attributes #0 = { nounwind }
201attributes #1 = { nounwind readonly }