blob: da9e0f32d72eeff06db6027c0e01df8fe889048b [file] [log] [blame]
Yaxun Liu99d56d22017-08-15 16:30:31 +00001// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | FileCheck %s
Yaxun Liu39195062017-08-04 18:16:31 +00002
3// Also test serialization of atomic operations here, to avoid duplicating the test.
Yaxun Liu99d56d22017-08-15 16:30:31 +00004// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl
5// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck %s
Yaxun Liu39195062017-08-04 18:16:31 +00006
7#ifndef ALREADY_INCLUDED
8#define ALREADY_INCLUDED
9
Yaxun Liu99d56d22017-08-15 16:30:31 +000010typedef __INTPTR_TYPE__ intptr_t;
11typedef int int8 __attribute__((ext_vector_type(8)));
12
13typedef enum memory_order {
14 memory_order_relaxed = __ATOMIC_RELAXED,
15 memory_order_acquire = __ATOMIC_ACQUIRE,
16 memory_order_release = __ATOMIC_RELEASE,
17 memory_order_acq_rel = __ATOMIC_ACQ_REL,
18 memory_order_seq_cst = __ATOMIC_SEQ_CST
19} memory_order;
20
21typedef enum memory_scope {
22 memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
23 memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
24 memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
25 memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
26#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
27 memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
28#endif
29} memory_scope;
30
Yaxun Liu39195062017-08-04 18:16:31 +000031atomic_int j;
32
33void fi1(atomic_int *i) {
34 // CHECK-LABEL: @fi1
35 // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
36 int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
37 // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
38 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
39 // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst
40 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
41 // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
42 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
43}
44
45void fi2(atomic_int *i) {
46 // CHECK-LABEL: @fi2
47 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
48 __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
49}
50
51void fi3(atomic_int *i, atomic_uint *ui) {
52 // CHECK-LABEL: @fi3
53 // CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
54 int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
55 // CHECK: atomicrmw min i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
56 x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
57 // CHECK: atomicrmw max i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
58 x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
59 // CHECK: atomicrmw umin i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
60 x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
61 // CHECK: atomicrmw umax i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
62 x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
63}
64
65bool fi4(atomic_int *i) {
66 // CHECK-LABEL: @fi4(
67 // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
68 // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
69 // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
70 // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
71 // CHECK: store i32 [[OLD]]
72 int cmp = 0;
73 return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
74}
75
Yaxun Liu30d652a2017-08-15 16:02:49 +000076void fi5(atomic_int *i, int scope) {
77 // CHECK-LABEL: @fi5
78 // CHECK: switch i32 %{{.*}}, label %opencl_allsvmdevices [
79 // CHECK-NEXT: i32 1, label %opencl_workgroup
80 // CHECK-NEXT: i32 2, label %opencl_device
81 // CHECK-NEXT: i32 4, label %opencl_subgroup
82 // CHECK-NEXT: ]
83 // CHECK: opencl_workgroup:
84 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst
85 // CHECK: br label %atomic.scope.continue
86 // CHECK: opencl_device:
87 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst
88 // CHECK: br label %atomic.scope.continue
89 // CHECK: opencl_allsvmdevices:
90 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst, align 4
91 // CHECK: br label %atomic.scope.continue
92 // CHECK: opencl_subgroup:
93 // CHECK: %5 = load atomic i32, i32 addrspace(4)* %0 syncscope("subgroup") seq_cst, align 4
94 // CHECK: br label %atomic.scope.continue
95 // CHECK: atomic.scope.continue:
96 int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
97}
98
99void fi6(atomic_int *i, int order, int scope) {
100 // CHECK-LABEL: @fi6
101 // CHECK: switch i32 %{{.*}}, label %monotonic [
102 // CHECK-NEXT: i32 1, label %acquire
103 // CHECK-NEXT: i32 2, label %acquire
104 // CHECK-NEXT: i32 5, label %seqcst
105 // CHECK-NEXT: ]
106 // CHECK: monotonic:
107 // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [
108 // CHECK-NEXT: i32 1, label %[[MON_WG:.*]]
109 // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]]
110 // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]]
111 // CHECK-NEXT: ]
112 // CHECK: acquire:
113 // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [
114 // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]]
115 // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]]
116 // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]]
117 // CHECK-NEXT: ]
118 // CHECK: seqcst:
119 // CHECK: switch i32 %2, label %[[SEQ_ALL:.*]] [
120 // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]]
121 // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]]
122 // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
123 // CHECK-NEXT: ]
124 // CHECK: [[MON_WG]]:
125 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") monotonic
126 // CHECK: [[MON_DEV]]:
127 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") monotonic
128 // CHECK: [[MON_ALL]]:
129 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} monotonic
130 // CHECK: [[MON_SUB]]:
131 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") monotonic
132 // CHECK: [[ACQ_WG]]:
133 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") acquire
134 // CHECK: [[ACQ_DEV]]:
135 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") acquire
136 // CHECK: [[ACQ_ALL]]:
137 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} acquire
138 // CHECK: [[ACQ_SUB]]:
139 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") acquire
140 // CHECK: [[SEQ_WG]]:
141 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst
142 // CHECK: [[SEQ_DEV]]:
143 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst
144 // CHECK: [[SEQ_ALL]]:
145 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst
146 // CHECK: [[SEQ_SUB]]:
147 // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst
148 int x = __opencl_atomic_load(i, order, scope);
149}
150
Yaxun Liu39195062017-08-04 18:16:31 +0000151float ff1(global atomic_float *d) {
152 // CHECK-LABEL: @ff1
153 // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
154 return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
155}
156
157void ff2(atomic_float *d) {
158 // CHECK-LABEL: @ff2
159 // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release
160 __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
161}
162
163float ff3(atomic_float *d) {
164 // CHECK-LABEL: @ff3
165 // CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} syncscope("workgroup") seq_cst
166 return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
167}
168
169// CHECK-LABEL: @atomic_init_foo
170void atomic_init_foo()
171{
172 // CHECK-NOT: atomic
173 // CHECK: store
174 __opencl_atomic_init(&j, 42);
175
176 // CHECK-NOT: atomic
177 // CHECK: }
178}
179
180// CHECK-LABEL: @failureOrder
181void failureOrder(atomic_int *ptr, int *ptr2) {
182 // CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
183 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
184
185 // CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
186 __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
187}
188
189// CHECK-LABEL: @generalFailureOrder
190void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
191 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group);
192 // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
193 // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
194 // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
195 // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
196 // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
197 // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
198
199 // CHECK: [[MONOTONIC]]
200 // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
201 // CHECK-NEXT: ]
202
203 // CHECK: [[ACQUIRE]]
204 // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
205 // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
206 // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
207 // CHECK-NEXT: ]
208
209 // CHECK: [[RELEASE]]
210 // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
211 // CHECK-NEXT: ]
212
213 // CHECK: [[ACQREL]]
214 // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
215 // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
216 // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
217 // CHECK-NEXT: ]
218
219 // CHECK: [[SEQCST]]
220 // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
221 // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
222 // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
223 // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
224 // CHECK-NEXT: ]
225
226 // CHECK: [[MONOTONIC_MONOTONIC]]
227 // CHECK: cmpxchg {{.*}} monotonic monotonic
228 // CHECK: br
229
230 // CHECK: [[ACQUIRE_MONOTONIC]]
231 // CHECK: cmpxchg {{.*}} acquire monotonic
232 // CHECK: br
233
234 // CHECK: [[ACQUIRE_ACQUIRE]]
235 // CHECK: cmpxchg {{.*}} acquire acquire
236 // CHECK: br
237
238 // CHECK: [[ACQREL_MONOTONIC]]
239 // CHECK: cmpxchg {{.*}} acq_rel monotonic
240 // CHECK: br
241
242 // CHECK: [[ACQREL_ACQUIRE]]
243 // CHECK: cmpxchg {{.*}} acq_rel acquire
244 // CHECK: br
245
246 // CHECK: [[SEQCST_MONOTONIC]]
247 // CHECK: cmpxchg {{.*}} seq_cst monotonic
248 // CHECK: br
249
250 // CHECK: [[SEQCST_ACQUIRE]]
251 // CHECK: cmpxchg {{.*}} seq_cst acquire
252 // CHECK: br
253
254 // CHECK: [[SEQCST_SEQCST]]
255 // CHECK: cmpxchg {{.*}} seq_cst seq_cst
256 // CHECK: br
257}
258
259int test_volatile(volatile atomic_int *i) {
260 // CHECK-LABEL: @test_volatile
261 // CHECK: %[[i_addr:.*]] = alloca i32
262 // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
263 // CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]]
264 // CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]]
265 // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] syncscope("workgroup") seq_cst
266 // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]]
267 // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
268 // CHECK-NEXT: ret i32 %[[retval]]
269 return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
270}
271
272#endif