blob: 0434978caa283881635c17414beb081e8a583f89 [file] [log] [blame]
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00001; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s
3
4; CHECK-LABEL: {{^}}system_unordered
5; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
6; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
7; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
8; CHECK-NOT: buffer_wbinvl1_vol
9; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
10define amdgpu_kernel void @system_unordered(
11 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
12entry:
13 %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4
14 store i32 %val, i32 addrspace(4)* %out
15 ret void
16}
17
18; CHECK-LABEL: {{^}}system_monotonic
19; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
20; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
21; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
22; CHECK-NOT: buffer_wbinvl1_vol
23; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
24define amdgpu_kernel void @system_monotonic(
25 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
26entry:
27 %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4
28 store i32 %val, i32 addrspace(4)* %out
29 ret void
30}
31
32; CHECK-LABEL: {{^}}system_acquire
33; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
34; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
35; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
36; CHECK-NEXT: buffer_wbinvl1_vol
37; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
38define amdgpu_kernel void @system_acquire(
39 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
40entry:
41 %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4
42 store i32 %val, i32 addrspace(4)* %out
43 ret void
44}
45
46; CHECK-LABEL: {{^}}system_seq_cst
47; CHECK: s_waitcnt vmcnt(0){{$}}
48; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
49; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
50; CHECK-NEXT: buffer_wbinvl1_vol
51; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
52define amdgpu_kernel void @system_seq_cst(
53 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
54entry:
55 %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4
56 store i32 %val, i32 addrspace(4)* %out
57 ret void
58}
59
60; CHECK-LABEL: {{^}}singlethread_unordered
61; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
62; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
63; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
64; CHECK-NOT: buffer_wbinvl1_vol
65; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
66define amdgpu_kernel void @singlethread_unordered(
67 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
68entry:
69 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") unordered, align 4
70 store i32 %val, i32 addrspace(4)* %out
71 ret void
72}
73
74; CHECK-LABEL: {{^}}singlethread_monotonic
75; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
76; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
77; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
78; CHECK-NOT: buffer_wbinvl1_vol
79; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
80define amdgpu_kernel void @singlethread_monotonic(
81 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
82entry:
83 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") monotonic, align 4
84 store i32 %val, i32 addrspace(4)* %out
85 ret void
86}
87
88; CHECK-LABEL: {{^}}singlethread_acquire
89; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
90; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
91; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
92; CHECK-NOT: buffer_wbinvl1_vol
93; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
94define amdgpu_kernel void @singlethread_acquire(
95 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
96entry:
97 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") acquire, align 4
98 store i32 %val, i32 addrspace(4)* %out
99 ret void
100}
101
102; CHECK-LABEL: {{^}}singlethread_seq_cst
103; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
104; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
105; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
106; CHECK-NOT: buffer_wbinvl1_vol
107; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
108define amdgpu_kernel void @singlethread_seq_cst(
109 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
110entry:
111 %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") seq_cst, align 4
112 store i32 %val, i32 addrspace(4)* %out
113 ret void
114}
115
116; CHECK-LABEL: {{^}}agent_unordered
117; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
118; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
119; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
120; CHECK-NOT: buffer_wbinvl1_vol
121; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
122define amdgpu_kernel void @agent_unordered(
123 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
124entry:
125 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") unordered, align 4
126 store i32 %val, i32 addrspace(4)* %out
127 ret void
128}
129
130; CHECK-LABEL: {{^}}agent_monotonic
131; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
132; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
133; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
134; CHECK-NOT: buffer_wbinvl1_vol
135; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
136define amdgpu_kernel void @agent_monotonic(
137 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
138entry:
139 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") monotonic, align 4
140 store i32 %val, i32 addrspace(4)* %out
141 ret void
142}
143
144; CHECK-LABEL: {{^}}agent_acquire
145; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
146; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
147; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
148; CHECK-NEXT: buffer_wbinvl1_vol
149; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
150define amdgpu_kernel void @agent_acquire(
151 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
152entry:
153 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") acquire, align 4
154 store i32 %val, i32 addrspace(4)* %out
155 ret void
156}
157
158; CHECK-LABEL: {{^}}agent_seq_cst
159; CHECK: s_waitcnt vmcnt(0){{$}}
160; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
161; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
162; CHECK-NEXT: buffer_wbinvl1_vol
163; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
164define amdgpu_kernel void @agent_seq_cst(
165 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
166entry:
167 %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") seq_cst, align 4
168 store i32 %val, i32 addrspace(4)* %out
169 ret void
170}
171
172; CHECK-LABEL: {{^}}workgroup_unordered
173; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
174; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
175; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
176; CHECK-NOT: buffer_wbinvl1_vol
177; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
178define amdgpu_kernel void @workgroup_unordered(
179 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
180entry:
181 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") unordered, align 4
182 store i32 %val, i32 addrspace(4)* %out
183 ret void
184}
185
186; CHECK-LABEL: {{^}}workgroup_monotonic
187; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
188; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
189; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
190; CHECK-NOT: buffer_wbinvl1_vol
191; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
192define amdgpu_kernel void @workgroup_monotonic(
193 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
194entry:
195 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") monotonic, align 4
196 store i32 %val, i32 addrspace(4)* %out
197 ret void
198}
199
200; CHECK-LABEL: {{^}}workgroup_acquire
201; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
202; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
203; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
204; CHECK-NOT: buffer_wbinvl1_vol
205; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
206define amdgpu_kernel void @workgroup_acquire(
207 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
208entry:
209 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") acquire, align 4
210 store i32 %val, i32 addrspace(4)* %out
211 ret void
212}
213
214; CHECK-LABEL: {{^}}workgroup_seq_cst
215; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
216; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
217; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
218; CHECK-NOT: buffer_wbinvl1_vol
219; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
220define amdgpu_kernel void @workgroup_seq_cst(
221 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
222entry:
223 %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") seq_cst, align 4
224 store i32 %val, i32 addrspace(4)* %out
225 ret void
226}
227
228; CHECK-LABEL: {{^}}wavefront_unordered
229; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
230; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
231; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
232; CHECK-NOT: buffer_wbinvl1_vol
233; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
234define amdgpu_kernel void @wavefront_unordered(
235 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
236entry:
237 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") unordered, align 4
238 store i32 %val, i32 addrspace(4)* %out
239 ret void
240}
241
242; CHECK-LABEL: {{^}}wavefront_monotonic
243; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
244; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
245; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
246; CHECK-NOT: buffer_wbinvl1_vol
247; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
248define amdgpu_kernel void @wavefront_monotonic(
249 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
250entry:
251 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") monotonic, align 4
252 store i32 %val, i32 addrspace(4)* %out
253 ret void
254}
255
256; CHECK-LABEL: {{^}}wavefront_acquire
257; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
258; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
259; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
260; CHECK-NOT: buffer_wbinvl1_vol
261; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
262define amdgpu_kernel void @wavefront_acquire(
263 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
264entry:
265 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") acquire, align 4
266 store i32 %val, i32 addrspace(4)* %out
267 ret void
268}
269
270; CHECK-LABEL: {{^}}wavefront_seq_cst
271; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
272; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
273; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
274; CHECK-NOT: buffer_wbinvl1_vol
275; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
276define amdgpu_kernel void @wavefront_seq_cst(
277 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
278entry:
279 %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") seq_cst, align 4
280 store i32 %val, i32 addrspace(4)* %out
281 ret void
282}