blob: 57e705f2732a796e1e3267171310d3e5249ecce3 [file] [log] [blame]
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +00001; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s
3; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00005
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +00006declare i32 @llvm.amdgcn.workitem.id.x()
7
8; GCN-LABEL: {{^}}system_unordered
9; GCN-NOT: s_waitcnt vmcnt(0){{$}}
10; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
11; GCN-NOT: s_waitcnt vmcnt(0){{$}}
12; GCN-NOT: buffer_wbinvl1_vol
13; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000014define amdgpu_kernel void @system_unordered(
15 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
16entry:
17 %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4
18 store i32 %val, i32 addrspace(4)* %out
19 ret void
20}
21
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000022; GCN-LABEL: {{^}}system_monotonic
23; GCN-NOT: s_waitcnt vmcnt(0){{$}}
24; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
25; GCN-NOT: s_waitcnt vmcnt(0){{$}}
26; GCN-NOT: buffer_wbinvl1_vol
27; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000028define amdgpu_kernel void @system_monotonic(
29 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
30entry:
31 %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4
32 store i32 %val, i32 addrspace(4)* %out
33 ret void
34}
35
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000036; GCN-LABEL: {{^}}system_acquire
37; GCN-NOT: s_waitcnt vmcnt(0){{$}}
38; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
39; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
40; GCN-NEXT: buffer_wbinvl1_vol
41; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000042define amdgpu_kernel void @system_acquire(
43 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
44entry:
45 %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4
46 store i32 %val, i32 addrspace(4)* %out
47 ret void
48}
49
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000050; GCN-LABEL: {{^}}system_seq_cst
51; GCN: s_waitcnt vmcnt(0){{$}}
52; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
53; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
54; GCN-NEXT: buffer_wbinvl1_vol
55; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000056define amdgpu_kernel void @system_seq_cst(
57 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
58entry:
59 %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4
60 store i32 %val, i32 addrspace(4)* %out
61 ret void
62}
63
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000064; GCN-LABEL: {{^}}singlethread_unordered
65; GCN-NOT: s_waitcnt vmcnt(0){{$}}
66; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
67; GCN-NOT: s_waitcnt vmcnt(0){{$}}
68; GCN-NOT: buffer_wbinvl1_vol
69; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000070define amdgpu_kernel void @singlethread_unordered(
71 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
72entry:
73 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") unordered, align 4
74 store i32 %val, i32 addrspace(4)* %out
75 ret void
76}
77
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000078; GCN-LABEL: {{^}}singlethread_monotonic
79; GCN-NOT: s_waitcnt vmcnt(0){{$}}
80; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
81; GCN-NOT: s_waitcnt vmcnt(0){{$}}
82; GCN-NOT: buffer_wbinvl1_vol
83; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000084define amdgpu_kernel void @singlethread_monotonic(
85 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
86entry:
87 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") monotonic, align 4
88 store i32 %val, i32 addrspace(4)* %out
89 ret void
90}
91
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +000092; GCN-LABEL: {{^}}singlethread_acquire
93; GCN-NOT: s_waitcnt vmcnt(0){{$}}
94; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
95; GCN-NOT: s_waitcnt vmcnt(0){{$}}
96; GCN-NOT: buffer_wbinvl1_vol
97; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000098define amdgpu_kernel void @singlethread_acquire(
99 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
100entry:
101 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") acquire, align 4
102 store i32 %val, i32 addrspace(4)* %out
103 ret void
104}
105
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000106; GCN-LABEL: {{^}}singlethread_seq_cst
107; GCN-NOT: s_waitcnt vmcnt(0){{$}}
108; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
109; GCN-NOT: s_waitcnt vmcnt(0){{$}}
110; GCN-NOT: buffer_wbinvl1_vol
111; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000112define amdgpu_kernel void @singlethread_seq_cst(
113 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
114entry:
115 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") seq_cst, align 4
116 store i32 %val, i32 addrspace(4)* %out
117 ret void
118}
119
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000120; GCN-LABEL: {{^}}agent_unordered
121; GCN-NOT: s_waitcnt vmcnt(0){{$}}
122; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
123; GCN-NOT: s_waitcnt vmcnt(0){{$}}
124; GCN-NOT: buffer_wbinvl1_vol
125; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000126define amdgpu_kernel void @agent_unordered(
127 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
128entry:
129 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") unordered, align 4
130 store i32 %val, i32 addrspace(4)* %out
131 ret void
132}
133
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000134; GCN-LABEL: {{^}}agent_monotonic
135; GCN-NOT: s_waitcnt vmcnt(0){{$}}
136; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
137; GCN-NOT: s_waitcnt vmcnt(0){{$}}
138; GCN-NOT: buffer_wbinvl1_vol
139; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000140define amdgpu_kernel void @agent_monotonic(
141 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
142entry:
143 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") monotonic, align 4
144 store i32 %val, i32 addrspace(4)* %out
145 ret void
146}
147
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000148; GCN-LABEL: {{^}}agent_acquire
149; GCN-NOT: s_waitcnt vmcnt(0){{$}}
150; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
151; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
152; GCN-NEXT: buffer_wbinvl1_vol
153; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000154define amdgpu_kernel void @agent_acquire(
155 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
156entry:
157 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") acquire, align 4
158 store i32 %val, i32 addrspace(4)* %out
159 ret void
160}
161
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000162; GCN-LABEL: {{^}}agent_seq_cst
163; GCN: s_waitcnt vmcnt(0){{$}}
164; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
165; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
166; GCN-NEXT: buffer_wbinvl1_vol
167; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000168define amdgpu_kernel void @agent_seq_cst(
169 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
170entry:
171 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") seq_cst, align 4
172 store i32 %val, i32 addrspace(4)* %out
173 ret void
174}
175
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000176; GCN-LABEL: {{^}}workgroup_unordered
177; GCN-NOT: s_waitcnt vmcnt(0){{$}}
178; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
179; GCN-NOT: s_waitcnt vmcnt(0){{$}}
180; GCN-NOT: buffer_wbinvl1_vol
181; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000182define amdgpu_kernel void @workgroup_unordered(
183 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
184entry:
185 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") unordered, align 4
186 store i32 %val, i32 addrspace(4)* %out
187 ret void
188}
189
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000190; GCN-LABEL: {{^}}workgroup_monotonic
191; GCN-NOT: s_waitcnt vmcnt(0){{$}}
192; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
193; GCN-NOT: s_waitcnt vmcnt(0){{$}}
194; GCN-NOT: buffer_wbinvl1_vol
195; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000196define amdgpu_kernel void @workgroup_monotonic(
197 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
198entry:
199 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") monotonic, align 4
200 store i32 %val, i32 addrspace(4)* %out
201 ret void
202}
203
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000204; GCN-LABEL: {{^}}workgroup_acquire
205; GCN-NOT: s_waitcnt vmcnt(0){{$}}
206; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
207; GCN-NOT: s_waitcnt vmcnt(0){{$}}
208; GCN-NOT: buffer_wbinvl1_vol
209; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000210define amdgpu_kernel void @workgroup_acquire(
211 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
212entry:
213 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") acquire, align 4
214 store i32 %val, i32 addrspace(4)* %out
215 ret void
216}
217
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000218; GCN-LABEL: {{^}}workgroup_seq_cst
219; GCN-NOT: s_waitcnt vmcnt(0){{$}}
220; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
221; GCN-NOT: s_waitcnt vmcnt(0){{$}}
222; GCN-NOT: buffer_wbinvl1_vol
223; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000224define amdgpu_kernel void @workgroup_seq_cst(
225 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
226entry:
227 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") seq_cst, align 4
228 store i32 %val, i32 addrspace(4)* %out
229 ret void
230}
231
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000232; GCN-LABEL: {{^}}wavefront_unordered
233; GCN-NOT: s_waitcnt vmcnt(0){{$}}
234; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
235; GCN-NOT: s_waitcnt vmcnt(0){{$}}
236; GCN-NOT: buffer_wbinvl1_vol
237; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000238define amdgpu_kernel void @wavefront_unordered(
239 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
240entry:
241 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") unordered, align 4
242 store i32 %val, i32 addrspace(4)* %out
243 ret void
244}
245
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000246; GCN-LABEL: {{^}}wavefront_monotonic
247; GCN-NOT: s_waitcnt vmcnt(0){{$}}
248; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
249; GCN-NOT: s_waitcnt vmcnt(0){{$}}
250; GCN-NOT: buffer_wbinvl1_vol
251; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000252define amdgpu_kernel void @wavefront_monotonic(
253 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
254entry:
255 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") monotonic, align 4
256 store i32 %val, i32 addrspace(4)* %out
257 ret void
258}
259
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000260; GCN-LABEL: {{^}}wavefront_acquire
261; GCN-NOT: s_waitcnt vmcnt(0){{$}}
262; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
263; GCN-NOT: s_waitcnt vmcnt(0){{$}}
264; GCN-NOT: buffer_wbinvl1_vol
265; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000266define amdgpu_kernel void @wavefront_acquire(
267 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
268entry:
269 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") acquire, align 4
270 store i32 %val, i32 addrspace(4)* %out
271 ret void
272}
273
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000274; GCN-LABEL: {{^}}wavefront_seq_cst
275; GCN-NOT: s_waitcnt vmcnt(0){{$}}
276; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
277; GCN-NOT: s_waitcnt vmcnt(0){{$}}
278; GCN-NOT: buffer_wbinvl1_vol
279; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000280define amdgpu_kernel void @wavefront_seq_cst(
281 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
282entry:
283 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") seq_cst, align 4
284 store i32 %val, i32 addrspace(4)* %out
285 ret void
286}
Konstantin Zhuravlyovcff11552017-10-25 17:04:46 +0000287
288; GCN-LABEL: {{^}}nontemporal_private_0
289; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
290define amdgpu_kernel void @nontemporal_private_0(
291 i32* %in, i32 addrspace(4)* %out) {
292entry:
293 %val = load i32, i32* %in, align 4, !nontemporal !0
294 store i32 %val, i32 addrspace(4)* %out
295 ret void
296}
297
298; GCN-LABEL: {{^}}nontemporal_private_1
299; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
300define amdgpu_kernel void @nontemporal_private_1(
301 i32* %in, i32 addrspace(4)* %out) {
302entry:
303 %tid = call i32 @llvm.amdgcn.workitem.id.x()
304 %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid
305 %val = load i32, i32* %val.gep, align 4, !nontemporal !0
306 store i32 %val, i32 addrspace(4)* %out
307 ret void
308}
309
310; GCN-LABEL: {{^}}nontemporal_global_0
311; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}}
312define amdgpu_kernel void @nontemporal_global_0(
313 i32 addrspace(1)* %in, i32 addrspace(4)* %out) {
314entry:
315 %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0
316 store i32 %val, i32 addrspace(4)* %out
317 ret void
318}
319
320; GCN-LABEL: {{^}}nontemporal_global_1
321; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
322; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}}
323define amdgpu_kernel void @nontemporal_global_1(
324 i32 addrspace(1)* %in, i32 addrspace(4)* %out) {
325entry:
326 %tid = call i32 @llvm.amdgcn.workitem.id.x()
327 %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
328 %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0
329 store i32 %val, i32 addrspace(4)* %out
330 ret void
331}
332
333; GCN-LABEL: {{^}}nontemporal_local_0
334; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
335define amdgpu_kernel void @nontemporal_local_0(
336 i32 addrspace(3)* %in, i32 addrspace(4)* %out) {
337entry:
338 %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0
339 store i32 %val, i32 addrspace(4)* %out
340 ret void
341}
342
343; GCN-LABEL: {{^}}nontemporal_local_1
344; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
345define amdgpu_kernel void @nontemporal_local_1(
346 i32 addrspace(3)* %in, i32 addrspace(4)* %out) {
347entry:
348 %tid = call i32 @llvm.amdgcn.workitem.id.x()
349 %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid
350 %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0
351 store i32 %val, i32 addrspace(4)* %out
352 ret void
353}
354
355; GCN-LABEL: {{^}}nontemporal_flat_0
356; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
357define amdgpu_kernel void @nontemporal_flat_0(
358 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
359entry:
360 %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0
361 store i32 %val, i32 addrspace(4)* %out
362 ret void
363}
364
365; GCN-LABEL: {{^}}nontemporal_flat_1
366; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
367define amdgpu_kernel void @nontemporal_flat_1(
368 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
369entry:
370 %tid = call i32 @llvm.amdgcn.workitem.id.x()
371 %val.gep = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tid
372 %val = load i32, i32 addrspace(4)* %val.gep, align 4, !nontemporal !0
373 store i32 %val, i32 addrspace(4)* %out
374 ret void
375}
376
377!0 = !{i32 1}