blob: 50df0bc166390be717fe2bea82afb3980e4777e2 [file] [log] [blame]
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +00001; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s
3; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00005
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +00006declare i32 @llvm.amdgcn.workitem.id.x()
7
8; GCN-LABEL: {{^}}system_unordered
9; GCN-NOT: s_waitcnt vmcnt(0){{$}}
10; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000011define amdgpu_kernel void @system_unordered(
12 i32 %in, i32 addrspace(4)* %out) {
13entry:
14 store atomic i32 %in, i32 addrspace(4)* %out unordered, align 4
15 ret void
16}
17
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000018; GCN-LABEL: {{^}}system_monotonic
19; GCN-NOT: s_waitcnt vmcnt(0){{$}}
20; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000021define amdgpu_kernel void @system_monotonic(
22 i32 %in, i32 addrspace(4)* %out) {
23entry:
24 store atomic i32 %in, i32 addrspace(4)* %out monotonic, align 4
25 ret void
26}
27
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000028; GCN-LABEL: {{^}}system_release
29; GCN: s_waitcnt vmcnt(0){{$}}
30; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000031define amdgpu_kernel void @system_release(
32 i32 %in, i32 addrspace(4)* %out) {
33entry:
34 store atomic i32 %in, i32 addrspace(4)* %out release, align 4
35 ret void
36}
37
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000038; GCN-LABEL: {{^}}system_seq_cst
39; GCN: s_waitcnt vmcnt(0){{$}}
40; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000041define amdgpu_kernel void @system_seq_cst(
42 i32 %in, i32 addrspace(4)* %out) {
43entry:
44 store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4
45 ret void
46}
47
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000048; GCN-LABEL: {{^}}singlethread_unordered
49; GCN-NOT: s_waitcnt vmcnt(0){{$}}
50; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000051define amdgpu_kernel void @singlethread_unordered(
52 i32 %in, i32 addrspace(4)* %out) {
53entry:
54 store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") unordered, align 4
55 ret void
56}
57
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000058; GCN-LABEL: {{^}}singlethread_monotonic
59; GCN-NOT: s_waitcnt vmcnt(0){{$}}
60; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000061define amdgpu_kernel void @singlethread_monotonic(
62 i32 %in, i32 addrspace(4)* %out) {
63entry:
64 store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") monotonic, align 4
65 ret void
66}
67
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000068; GCN-LABEL: {{^}}singlethread_release
69; GCN-NOT: s_waitcnt vmcnt(0){{$}}
70; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000071define amdgpu_kernel void @singlethread_release(
72 i32 %in, i32 addrspace(4)* %out) {
73entry:
74 store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") release, align 4
75 ret void
76}
77
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000078; GCN-LABEL: {{^}}singlethread_seq_cst
79; GCN-NOT: s_waitcnt vmcnt(0){{$}}
80; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000081define amdgpu_kernel void @singlethread_seq_cst(
82 i32 %in, i32 addrspace(4)* %out) {
83entry:
84 store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") seq_cst, align 4
85 ret void
86}
87
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000088; GCN-LABEL: {{^}}agent_unordered
89; GCN-NOT: s_waitcnt vmcnt(0){{$}}
90; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000091define amdgpu_kernel void @agent_unordered(
92 i32 %in, i32 addrspace(4)* %out) {
93entry:
94 store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") unordered, align 4
95 ret void
96}
97
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000098; GCN-LABEL: {{^}}agent_monotonic
99; GCN-NOT: s_waitcnt vmcnt(0){{$}}
100; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000101define amdgpu_kernel void @agent_monotonic(
102 i32 %in, i32 addrspace(4)* %out) {
103entry:
104 store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") monotonic, align 4
105 ret void
106}
107
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000108; GCN-LABEL: {{^}}agent_release
109; GCN: s_waitcnt vmcnt(0){{$}}
110; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000111define amdgpu_kernel void @agent_release(
112 i32 %in, i32 addrspace(4)* %out) {
113entry:
114 store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") release, align 4
115 ret void
116}
117
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000118; GCN-LABEL: {{^}}agent_seq_cst
119; GCN: s_waitcnt vmcnt(0){{$}}
120; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000121define amdgpu_kernel void @agent_seq_cst(
122 i32 %in, i32 addrspace(4)* %out) {
123entry:
124 store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") seq_cst, align 4
125 ret void
126}
127
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000128; GCN-LABEL: {{^}}workgroup_unordered
129; GCN-NOT: s_waitcnt vmcnt(0){{$}}
130; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000131define amdgpu_kernel void @workgroup_unordered(
132 i32 %in, i32 addrspace(4)* %out) {
133entry:
134 store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") unordered, align 4
135 ret void
136}
137
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000138; GCN-LABEL: {{^}}workgroup_monotonic
139; GCN-NOT: s_waitcnt vmcnt(0){{$}}
140; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000141define amdgpu_kernel void @workgroup_monotonic(
142 i32 %in, i32 addrspace(4)* %out) {
143entry:
144 store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") monotonic, align 4
145 ret void
146}
147
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000148; GCN-LABEL: {{^}}workgroup_release
149; GCN-NOT: s_waitcnt vmcnt(0){{$}}
150; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000151define amdgpu_kernel void @workgroup_release(
152 i32 %in, i32 addrspace(4)* %out) {
153entry:
154 store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") release, align 4
155 ret void
156}
157
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000158; GCN-LABEL: {{^}}workgroup_seq_cst
159; GCN-NOT: s_waitcnt vmcnt(0){{$}}
160; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000161define amdgpu_kernel void @workgroup_seq_cst(
162 i32 %in, i32 addrspace(4)* %out) {
163entry:
164 store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") seq_cst, align 4
165 ret void
166}
167
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000168; GCN-LABEL: {{^}}wavefront_unordered
169; GCN-NOT: s_waitcnt vmcnt(0){{$}}
170; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000171define amdgpu_kernel void @wavefront_unordered(
172 i32 %in, i32 addrspace(4)* %out) {
173entry:
174 store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") unordered, align 4
175 ret void
176}
177
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000178; GCN-LABEL: {{^}}wavefront_monotonic
179; GCN-NOT: s_waitcnt vmcnt(0){{$}}
180; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000181define amdgpu_kernel void @wavefront_monotonic(
182 i32 %in, i32 addrspace(4)* %out) {
183entry:
184 store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") monotonic, align 4
185 ret void
186}
187
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000188; GCN-LABEL: {{^}}wavefront_release
189; GCN-NOT: s_waitcnt vmcnt(0){{$}}
190; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000191define amdgpu_kernel void @wavefront_release(
192 i32 %in, i32 addrspace(4)* %out) {
193entry:
194 store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") release, align 4
195 ret void
196}
197
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000198; GCN-LABEL: {{^}}wavefront_seq_cst
199; GCN-NOT: s_waitcnt vmcnt(0){{$}}
200; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000201define amdgpu_kernel void @wavefront_seq_cst(
202 i32 %in, i32 addrspace(4)* %out) {
203entry:
204 store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") seq_cst, align 4
205 ret void
206}
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000207
208; GCN-LABEL: {{^}}nontemporal_private_0
209; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
210define amdgpu_kernel void @nontemporal_private_0(
211 i32 addrspace(4)* %in, i32* %out) {
212entry:
213 %val = load i32, i32 addrspace(4)* %in, align 4
214 store i32 %val, i32* %out, !nontemporal !0
215 ret void
216}
217
218; GCN-LABEL: {{^}}nontemporal_private_1
219; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
220define amdgpu_kernel void @nontemporal_private_1(
221 i32 addrspace(4)* %in, i32* %out) {
222entry:
223 %tid = call i32 @llvm.amdgcn.workitem.id.x()
224 %val = load i32, i32 addrspace(4)* %in, align 4
225 %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
226 store i32 %val, i32* %out.gep, !nontemporal !0
227 ret void
228}
229
230; GCN-LABEL: {{^}}nontemporal_global_0
231; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
232; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
233define amdgpu_kernel void @nontemporal_global_0(
234 i32 addrspace(4)* %in, i32 addrspace(1)* %out) {
235entry:
236 %val = load i32, i32 addrspace(4)* %in, align 4
237 store i32 %val, i32 addrspace(1)* %out, !nontemporal !0
238 ret void
239}
240
241; GCN-LABEL: {{^}}nontemporal_global_1
242; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
243; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
244define amdgpu_kernel void @nontemporal_global_1(
245 i32 addrspace(4)* %in, i32 addrspace(1)* %out) {
246entry:
247 %tid = call i32 @llvm.amdgcn.workitem.id.x()
248 %val = load i32, i32 addrspace(4)* %in, align 4
249 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
250 store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0
251 ret void
252}
253
254; GCN-LABEL: {{^}}nontemporal_local_0
255; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
256define amdgpu_kernel void @nontemporal_local_0(
257 i32 addrspace(4)* %in, i32 addrspace(3)* %out) {
258entry:
259 %val = load i32, i32 addrspace(4)* %in, align 4
260 store i32 %val, i32 addrspace(3)* %out, !nontemporal !0
261 ret void
262}
263
264; GCN-LABEL: {{^}}nontemporal_local_1
265; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
266define amdgpu_kernel void @nontemporal_local_1(
267 i32 addrspace(4)* %in, i32 addrspace(3)* %out) {
268entry:
269 %tid = call i32 @llvm.amdgcn.workitem.id.x()
270 %val = load i32, i32 addrspace(4)* %in, align 4
271 %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid
272 store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0
273 ret void
274}
275
276; GCN-LABEL: {{^}}nontemporal_flat_0
277; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
278define amdgpu_kernel void @nontemporal_flat_0(
279 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
280entry:
281 %val = load i32, i32 addrspace(4)* %in, align 4
282 store i32 %val, i32 addrspace(4)* %out, !nontemporal !0
283 ret void
284}
285
286; GCN-LABEL: {{^}}nontemporal_flat_1
287; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
288define amdgpu_kernel void @nontemporal_flat_1(
289 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
290entry:
291 %tid = call i32 @llvm.amdgcn.workitem.id.x()
292 %val = load i32, i32 addrspace(4)* %in, align 4
293 %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid
294 store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0
295 ret void
296}
297
298!0 = !{i32 1}