blob: 1fede8c8f5afbfd13ceb6b2528b9723ae5cc0c60 [file] [log] [blame]
sewardjac258d82009-07-04 12:44:08 +00001
2/* This is an example of a program which does atomic memory operations
3 between two processes which share a page. Valgrind 3.4.1 and
4 earlier produce incorrect answers because it does not preserve
5 atomicity of the relevant instructions in the generated code; but
6 the post-DCAS-merge versions of Valgrind do behave correctly. */
7
sewardj9bd30a82011-07-11 11:46:52 +00008/* On ARM, this can be compiled into either ARM or Thumb code, so as
9 to test both A and T encodings of LDREX/STREX et al. Also on ARM,
10 it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11 does on any other platform. */
12
sewardjac258d82009-07-04 12:44:08 +000013#include <stdlib.h>
14#include <stdio.h>
sewardj571dfb02009-07-04 14:33:53 +000015#include <string.h>
sewardjac258d82009-07-04 12:44:08 +000016#include <assert.h>
17#include <unistd.h>
18#include <sys/wait.h>
njn626341c2009-07-12 22:58:26 +000019#include "tests/sys_mman.h"
sewardjac258d82009-07-04 12:44:08 +000020
21#define NNN 3456987
22
sewardj571dfb02009-07-04 14:33:53 +000023#define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7))
24
25
sewardjac258d82009-07-04 12:44:08 +000026__attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27{
sewardj571dfb02009-07-04 14:33:53 +000028#if defined(VGA_x86)
sewardjac258d82009-07-04 12:44:08 +000029 unsigned long block[2];
30 block[0] = (unsigned long)p;
31 block[1] = n;
sewardjac258d82009-07-04 12:44:08 +000032 __asm__ __volatile__(
33 "movl 0(%%esi),%%eax" "\n\t"
34 "movl 4(%%esi),%%ebx" "\n\t"
35 "lock; addb %%bl,(%%eax)" "\n"
36 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37 );
38#elif defined(VGA_amd64)
sewardj571dfb02009-07-04 14:33:53 +000039 unsigned long block[2];
40 block[0] = (unsigned long)p;
41 block[1] = n;
sewardjac258d82009-07-04 12:44:08 +000042 __asm__ __volatile__(
43 "movq 0(%%rsi),%%rax" "\n\t"
44 "movq 8(%%rsi),%%rbx" "\n\t"
45 "lock; addb %%bl,(%%rax)" "\n"
46 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47 );
sewardj571dfb02009-07-04 14:33:53 +000048#elif defined(VGA_ppc32)
49 /* Nasty hack. Does correctly atomically do *p += n, but only if p
sewardj59570ff2010-01-01 11:59:33 +000050 is 4-aligned -- guaranteed by caller. */
sewardj571dfb02009-07-04 14:33:53 +000051 unsigned long success;
52 do {
53 __asm__ __volatile__(
54 "lwarx 15,0,%1" "\n\t"
55 "add 15,15,%2" "\n\t"
56 "stwcx. 15,0,%1" "\n\t"
57 "mfcr %0" "\n\t"
58 "srwi %0,%0,29" "\n\t"
59 "andi. %0,%0,1" "\n"
60 : /*out*/"=b"(success)
61 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62 : /*trash*/ "memory", "cc", "r15"
63 );
64 } while (success != 1);
carllcae0cc22014-08-07 23:17:29 +000065#elif defined(VGA_ppc64be)
sewardj571dfb02009-07-04 14:33:53 +000066 /* Nasty hack. Does correctly atomically do *p += n, but only if p
67 is 8-aligned -- guaranteed by caller. */
68 unsigned long success;
69 do {
70 __asm__ __volatile__(
71 "ldarx 15,0,%1" "\n\t"
72 "add 15,15,%2" "\n\t"
73 "stdcx. 15,0,%1" "\n\t"
74 "mfcr %0" "\n\t"
75 "srwi %0,%0,29" "\n\t"
76 "andi. %0,%0,1" "\n"
77 : /*out*/"=b"(success)
78 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79 : /*trash*/ "memory", "cc", "r15"
80 );
81 } while (success != 1);
carlldd690bf2014-08-07 23:49:27 +000082#elif defined(VGA_ppc64le)
83 /* Nasty hack. Does correctly atomically do *p += n, but only if p
84 is 8-aligned -- guaranteed by caller. */
85 unsigned long success;
86 do {
87 __asm__ __volatile__(
88 "ldarx 15,0,%1" "\n\t"
89 "add 15,15,%2" "\n\t"
90 "stdcx. 15,0,%1" "\n\t"
91 "mfcr %0" "\n\t"
92 "srwi %0,%0,29" "\n\t"
93 "andi. %0,%0,1" "\n"
94 : /*out*/"=b"(success)
95 : /*in*/ "b"(p), "b"(((unsigned long)n))
96 : /*trash*/ "memory", "cc", "r15"
97 );
98 } while (success != 1);
sewardj59570ff2010-01-01 11:59:33 +000099#elif defined(VGA_arm)
sewardj9bd30a82011-07-11 11:46:52 +0000100 unsigned int block[3]
101 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
102 do {
103 __asm__ __volatile__(
104 "mov r5, %0" "\n\t"
105 "ldr r9, [r5, #0]" "\n\t" // p
106 "ldr r10, [r5, #4]" "\n\t" // n
107 "ldrexb r8, [r9]" "\n\t"
108 "add r8, r8, r10" "\n\t"
109 "strexb r4, r8, [r9]" "\n\t"
110 "str r4, [r5, #8]" "\n\t"
111 : /*out*/
112 : /*in*/ "r"(&block[0])
113 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
114 );
115 } while (block[2] != 0);
sewardj14857fd2014-02-20 17:34:45 +0000116#elif defined(VGA_arm64)
117 unsigned long long int block[3]
118 = { (unsigned long long int)p, (unsigned long long int)n,
119 0xFFFFFFFFFFFFFFFFULL};
120 do {
121 __asm__ __volatile__(
122 "mov x5, %0" "\n\t"
123 "ldr x9, [x5, #0]" "\n\t" // p
124 "ldr x10, [x5, #8]" "\n\t" // n
125 "ldxrb w8, [x9]" "\n\t"
126 "add x8, x8, x10" "\n\t"
127 "stxrb w4, w8, [x9]" "\n\t"
128 "str x4, [x5, #16]" "\n\t"
129 : /*out*/
130 : /*in*/ "r"(&block[0])
131 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
132 );
133 } while (block[2] != 0);
sewardjb5b87402011-03-07 16:05:35 +0000134#elif defined(VGA_s390x)
135 int dummy;
136 __asm__ __volatile__(
137 " l 0,%0\n\t"
138 "0: st 0,%1\n\t"
139 " icm 1,1,%1\n\t"
140 " ar 1,%2\n\t"
141 " stcm 1,1,%1\n\t"
142 " l 1,%1\n\t"
143 " cs 0,1,%0\n\t"
144 " jl 0b\n\t"
145 : "+m" (*p), "+m" (dummy)
146 : "d" (n)
147 : "cc", "memory", "0", "1");
sewardj5db15402012-06-07 09:13:21 +0000148#elif defined(VGA_mips32)
petarjdd6bf602013-04-03 22:51:23 +0000149 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
150 exception that can cause this function to fail. */
sewardj5db15402012-06-07 09:13:21 +0000151#if defined (_MIPSEL)
152 unsigned int block[3]
petarjdd6bf602013-04-03 22:51:23 +0000153 = { (unsigned int)p, (unsigned int)n, 0x0 };
sewardj5db15402012-06-07 09:13:21 +0000154 do {
155 __asm__ __volatile__(
petarjdd6bf602013-04-03 22:51:23 +0000156 "move $t0, %0" "\n\t"
157 "lw $t1, 0($t0)" "\n\t" // p
158 "lw $t2, 4($t0)" "\n\t" // n
159 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF
160 "li $t4, 0xFF" "\n\t"
161 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00
162 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
163 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00
164 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
165 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF
166 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4
sewardj5db15402012-06-07 09:13:21 +0000167 "sc $t3, 0($t1)" "\n\t"
petarjdd6bf602013-04-03 22:51:23 +0000168 "sw $t3, 8($t0)" "\n\t" // save result
sewardj5db15402012-06-07 09:13:21 +0000169 : /*out*/
170 : /*in*/ "r"(&block[0])
petarjdd6bf602013-04-03 22:51:23 +0000171 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
sewardj5db15402012-06-07 09:13:21 +0000172 );
173 } while (block[2] != 1);
174#elif defined (_MIPSEB)
175 unsigned int block[3]
petarjdd6bf602013-04-03 22:51:23 +0000176 = { (unsigned int)p, (unsigned int)n << 24, 0x0 };
sewardj5db15402012-06-07 09:13:21 +0000177 do {
178 __asm__ __volatile__(
petarjdd6bf602013-04-03 22:51:23 +0000179 "move $t0, %0" "\n\t"
180 "lw $t1, 0($t0)" "\n\t" // p
181 "lw $t2, 4($t0)" "\n\t" // n
182 "ll $t3, 0($t1)" "\n\t"
183 "addu $t3, $t3, $t2" "\n\t"
184 "sc $t3, 0($t1)" "\n\t"
185 "sw $t3, 8($t0)" "\n\t"
sewardj5db15402012-06-07 09:13:21 +0000186 : /*out*/
187 : /*in*/ "r"(&block[0])
petarjdd6bf602013-04-03 22:51:23 +0000188 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
189 );
190 } while (block[2] != 1);
191#endif
192#elif defined(VGA_mips64)
193 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
194 exception that can cause this function to fail. */
195#if defined (_MIPSEL)
196 unsigned long block[3]
197 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
198 do {
199 __asm__ __volatile__(
200 "move $t0, %0" "\n\t"
201 "ld $t1, 0($t0)" "\n\t" // p
202 "ld $t2, 8($t0)" "\n\t" // n
203 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF
204 "li $s0, 0xFF" "\n\t"
205 "nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00
206 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
207 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00
208 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
209 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF
210 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0
211 "sc $t3, 0($t1)" "\n\t"
212 "sw $t3, 16($t0)" "\n\t" // save result
213 : /*out*/
214 : /*in*/ "r"(&block[0])
215 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
sewardj5db15402012-06-07 09:13:21 +0000216 );
217 } while (block[2] != 1);
petarj767f92e2013-05-14 13:33:27 +0000218#elif defined (_MIPSEB)
219 unsigned long block[3]
220 = { (unsigned long)p, (unsigned long)n << 56, 0x0 };
221 do {
222 __asm__ __volatile__(
223 "move $t0, %0" "\n\t"
224 "ld $t1, 0($t0)" "\n\t" // p
225 "ld $t2, 8($t0)" "\n\t" // n
226 "lld $t3, 0($t1)" "\n\t"
227 "daddu $t3, $t3, $t2" "\n\t"
228 "scd $t3, 0($t1)" "\n\t"
229 "sd $t3, 16($t0)" "\n\t"
230 : /*out*/
231 : /*in*/ "r"(&block[0])
232 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
233 );
234 } while (block[2] != 1);
sewardj5db15402012-06-07 09:13:21 +0000235#endif
sewardjac258d82009-07-04 12:44:08 +0000236#else
237# error "Unsupported arch"
238#endif
239}
240
sewardj571dfb02009-07-04 14:33:53 +0000241
sewardjac258d82009-07-04 12:44:08 +0000242__attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
243{
sewardj571dfb02009-07-04 14:33:53 +0000244#if defined(VGA_x86)
sewardjac258d82009-07-04 12:44:08 +0000245 unsigned long block[2];
246 block[0] = (unsigned long)p;
247 block[1] = n;
sewardjac258d82009-07-04 12:44:08 +0000248 __asm__ __volatile__(
249 "movl 0(%%esi),%%eax" "\n\t"
250 "movl 4(%%esi),%%ebx" "\n\t"
251 "lock; addw %%bx,(%%eax)" "\n"
252 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
253 );
254#elif defined(VGA_amd64)
sewardj571dfb02009-07-04 14:33:53 +0000255 unsigned long block[2];
256 block[0] = (unsigned long)p;
257 block[1] = n;
sewardjac258d82009-07-04 12:44:08 +0000258 __asm__ __volatile__(
259 "movq 0(%%rsi),%%rax" "\n\t"
260 "movq 8(%%rsi),%%rbx" "\n\t"
261 "lock; addw %%bx,(%%rax)" "\n"
262 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
263 );
sewardj571dfb02009-07-04 14:33:53 +0000264#elif defined(VGA_ppc32)
265 /* Nasty hack. Does correctly atomically do *p += n, but only if p
266 is 8-aligned -- guaranteed by caller. */
267 unsigned long success;
268 do {
269 __asm__ __volatile__(
270 "lwarx 15,0,%1" "\n\t"
271 "add 15,15,%2" "\n\t"
272 "stwcx. 15,0,%1" "\n\t"
273 "mfcr %0" "\n\t"
274 "srwi %0,%0,29" "\n\t"
275 "andi. %0,%0,1" "\n"
276 : /*out*/"=b"(success)
277 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
278 : /*trash*/ "memory", "cc", "r15"
279 );
280 } while (success != 1);
carllcae0cc22014-08-07 23:17:29 +0000281#elif defined(VGA_ppc64be)
sewardj571dfb02009-07-04 14:33:53 +0000282 /* Nasty hack. Does correctly atomically do *p += n, but only if p
283 is 8-aligned -- guaranteed by caller. */
284 unsigned long success;
285 do {
286 __asm__ __volatile__(
287 "ldarx 15,0,%1" "\n\t"
288 "add 15,15,%2" "\n\t"
289 "stdcx. 15,0,%1" "\n\t"
290 "mfcr %0" "\n\t"
291 "srwi %0,%0,29" "\n\t"
292 "andi. %0,%0,1" "\n"
293 : /*out*/"=b"(success)
294 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
295 : /*trash*/ "memory", "cc", "r15"
296 );
297 } while (success != 1);
carlldd690bf2014-08-07 23:49:27 +0000298#elif defined(VGA_ppc64le)
299 /* Nasty hack. Does correctly atomically do *p += n, but only if p
300 is 8-aligned -- guaranteed by caller. */
301 unsigned long success;
302 do {
303 __asm__ __volatile__(
304 "ldarx 15,0,%1" "\n\t"
305 "add 15,15,%2" "\n\t"
306 "stdcx. 15,0,%1" "\n\t"
307 "mfcr %0" "\n\t"
308 "srwi %0,%0,29" "\n\t"
309 "andi. %0,%0,1" "\n"
310 : /*out*/"=b"(success)
311 : /*in*/ "b"(p), "b"(((unsigned long)n))
312 : /*trash*/ "memory", "cc", "r15"
313 );
314 } while (success != 1);
sewardj59570ff2010-01-01 11:59:33 +0000315#elif defined(VGA_arm)
sewardj9bd30a82011-07-11 11:46:52 +0000316 unsigned int block[3]
317 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
318 do {
319 __asm__ __volatile__(
320 "mov r5, %0" "\n\t"
321 "ldr r9, [r5, #0]" "\n\t" // p
322 "ldr r10, [r5, #4]" "\n\t" // n
323 "ldrexh r8, [r9]" "\n\t"
324 "add r8, r8, r10" "\n\t"
325 "strexh r4, r8, [r9]" "\n\t"
326 "str r4, [r5, #8]" "\n\t"
327 : /*out*/
328 : /*in*/ "r"(&block[0])
329 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
330 );
331 } while (block[2] != 0);
sewardj14857fd2014-02-20 17:34:45 +0000332#elif defined(VGA_arm64)
333 unsigned long long int block[3]
334 = { (unsigned long long int)p, (unsigned long long int)n,
335 0xFFFFFFFFFFFFFFFFULL};
336 do {
337 __asm__ __volatile__(
338 "mov x5, %0" "\n\t"
339 "ldr x9, [x5, #0]" "\n\t" // p
340 "ldr x10, [x5, #8]" "\n\t" // n
341 "ldxrh w8, [x9]" "\n\t"
342 "add x8, x8, x10" "\n\t"
343 "stxrh w4, w8, [x9]" "\n\t"
344 "str x4, [x5, #16]" "\n\t"
345 : /*out*/
346 : /*in*/ "r"(&block[0])
347 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
348 );
349 } while (block[2] != 0);
sewardjb5b87402011-03-07 16:05:35 +0000350#elif defined(VGA_s390x)
351 int dummy;
352 __asm__ __volatile__(
353 " l 0,%0\n\t"
354 "0: st 0,%1\n\t"
355 " icm 1,3,%1\n\t"
356 " ar 1,%2\n\t"
357 " stcm 1,3,%1\n\t"
358 " l 1,%1\n\t"
359 " cs 0,1,%0\n\t"
360 " jl 0b\n\t"
361 : "+m" (*p), "+m" (dummy)
362 : "d" (n)
363 : "cc", "memory", "0", "1");
sewardj5db15402012-06-07 09:13:21 +0000364#elif defined(VGA_mips32)
petarjdd6bf602013-04-03 22:51:23 +0000365 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
366 exception that can cause this function to fail. */
sewardj5db15402012-06-07 09:13:21 +0000367#if defined (_MIPSEL)
368 unsigned int block[3]
petarjdd6bf602013-04-03 22:51:23 +0000369 = { (unsigned int)p, (unsigned int)n, 0x0 };
sewardj5db15402012-06-07 09:13:21 +0000370 do {
371 __asm__ __volatile__(
petarjdd6bf602013-04-03 22:51:23 +0000372 "move $t0, %0" "\n\t"
373 "lw $t1, 0($t0)" "\n\t" // p
374 "lw $t2, 4($t0)" "\n\t" // n
375 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF
376 "li $t4, 0xFFFF" "\n\t"
377 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000
378 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
379 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000
380 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
381 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF
382 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4
sewardj5db15402012-06-07 09:13:21 +0000383 "sc $t3, 0($t1)" "\n\t"
petarjdd6bf602013-04-03 22:51:23 +0000384 "sw $t3, 8($t0)" "\n\t" // save result
sewardj5db15402012-06-07 09:13:21 +0000385 : /*out*/
386 : /*in*/ "r"(&block[0])
petarjdd6bf602013-04-03 22:51:23 +0000387 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
sewardj5db15402012-06-07 09:13:21 +0000388 );
389 } while (block[2] != 1);
390#elif defined (_MIPSEB)
391 unsigned int block[3]
petarjdd6bf602013-04-03 22:51:23 +0000392 = { (unsigned int)p, (unsigned int)n << 16, 0x0 };
sewardj5db15402012-06-07 09:13:21 +0000393 do {
394 __asm__ __volatile__(
petarjdd6bf602013-04-03 22:51:23 +0000395 "move $t0, %0" "\n\t"
396 "lw $t1, 0($t0)" "\n\t" // p
397 "lw $t2, 4($t0)" "\n\t" // n
398 "ll $t3, 0($t1)" "\n\t"
399 "addu $t3, $t3, $t2" "\n\t"
400 "sc $t3, 0($t1)" "\n\t"
401 "sw $t3, 8($t0)" "\n\t"
sewardj5db15402012-06-07 09:13:21 +0000402 : /*out*/
403 : /*in*/ "r"(&block[0])
petarjdd6bf602013-04-03 22:51:23 +0000404 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
405 );
406 } while (block[2] != 1);
407#endif
408#elif defined(VGA_mips64)
409 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
410 exception that can cause this function to fail. */
411#if defined (_MIPSEL)
412 unsigned long block[3]
413 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
414 do {
415 __asm__ __volatile__(
416 "move $t0, %0" "\n\t"
417 "ld $t1, 0($t0)" "\n\t" // p
418 "ld $t2, 8($t0)" "\n\t" // n
419 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF
420 "li $s0, 0xFFFF" "\n\t"
421 "nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000
422 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
423 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000
424 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
425 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF
426 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0
427 "sc $t3, 0($t1)" "\n\t"
428 "sw $t3, 16($t0)" "\n\t" // save result
429 : /*out*/
430 : /*in*/ "r"(&block[0])
431 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
sewardj5db15402012-06-07 09:13:21 +0000432 );
433 } while (block[2] != 1);
petarj767f92e2013-05-14 13:33:27 +0000434#elif defined (_MIPSEB)
435 unsigned long block[3]
436 = { (unsigned long)p, (unsigned long)n << 48, 0x0 };
437 do {
438 __asm__ __volatile__(
439 "move $t0, %0" "\n\t"
440 "ld $t1, 0($t0)" "\n\t" // p
441 "ld $t2, 8($t0)" "\n\t" // n
442 "lld $t3, 0($t1)" "\n\t"
443 "daddu $t3, $t3, $t2" "\n\t"
444 "scd $t3, 0($t1)" "\n\t"
445 "sd $t3, 16($t0)" "\n\t"
446 : /*out*/
447 : /*in*/ "r"(&block[0])
448 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
449 );
450 } while (block[2] != 1);
sewardj5db15402012-06-07 09:13:21 +0000451#endif
sewardjac258d82009-07-04 12:44:08 +0000452#else
453# error "Unsupported arch"
454#endif
455}
456
457__attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
458{
sewardj571dfb02009-07-04 14:33:53 +0000459#if defined(VGA_x86)
sewardjac258d82009-07-04 12:44:08 +0000460 unsigned long block[2];
461 block[0] = (unsigned long)p;
462 block[1] = n;
sewardjac258d82009-07-04 12:44:08 +0000463 __asm__ __volatile__(
464 "movl 0(%%esi),%%eax" "\n\t"
465 "movl 4(%%esi),%%ebx" "\n\t"
466 "lock; addl %%ebx,(%%eax)" "\n"
467 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
468 );
469#elif defined(VGA_amd64)
sewardj571dfb02009-07-04 14:33:53 +0000470 unsigned long block[2];
471 block[0] = (unsigned long)p;
472 block[1] = n;
sewardjac258d82009-07-04 12:44:08 +0000473 __asm__ __volatile__(
474 "movq 0(%%rsi),%%rax" "\n\t"
475 "movq 8(%%rsi),%%rbx" "\n\t"
476 "lock; addl %%ebx,(%%rax)" "\n"
477 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
478 );
sewardj571dfb02009-07-04 14:33:53 +0000479#elif defined(VGA_ppc32)
480 unsigned long success;
481 do {
482 __asm__ __volatile__(
483 "lwarx 15,0,%1" "\n\t"
484 "add 15,15,%2" "\n\t"
485 "stwcx. 15,0,%1" "\n\t"
486 "mfcr %0" "\n\t"
487 "srwi %0,%0,29" "\n\t"
488 "andi. %0,%0,1" "\n"
489 : /*out*/"=b"(success)
490 : /*in*/ "b"(p), "b"(n)
491 : /*trash*/ "memory", "cc", "r15"
492 );
493 } while (success != 1);
carllcae0cc22014-08-07 23:17:29 +0000494#elif defined(VGA_ppc64be)
sewardj571dfb02009-07-04 14:33:53 +0000495 /* Nasty hack. Does correctly atomically do *p += n, but only if p
496 is 8-aligned -- guaranteed by caller. */
497 unsigned long success;
498 do {
499 __asm__ __volatile__(
500 "ldarx 15,0,%1" "\n\t"
501 "add 15,15,%2" "\n\t"
502 "stdcx. 15,0,%1" "\n\t"
503 "mfcr %0" "\n\t"
504 "srwi %0,%0,29" "\n\t"
505 "andi. %0,%0,1" "\n"
506 : /*out*/"=b"(success)
507 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
508 : /*trash*/ "memory", "cc", "r15"
509 );
510 } while (success != 1);
carlldd690bf2014-08-07 23:49:27 +0000511#elif defined(VGA_ppc64le)
512 /* Nasty hack. Does correctly atomically do *p += n, but only if p
513 is 8-aligned -- guaranteed by caller. */
514 unsigned long success;
515 do {
516 __asm__ __volatile__(
517 "ldarx 15,0,%1" "\n\t"
518 "add 15,15,%2" "\n\t"
519 "stdcx. 15,0,%1" "\n\t"
520 "mfcr %0" "\n\t"
521 "srwi %0,%0,29" "\n\t"
522 "andi. %0,%0,1" "\n"
523 : /*out*/"=b"(success)
524 : /*in*/ "b"(p), "b"(((unsigned long)n))
525 : /*trash*/ "memory", "cc", "r15"
526 );
527 } while (success != 1);
sewardj59570ff2010-01-01 11:59:33 +0000528#elif defined(VGA_arm)
sewardj1956c502010-09-24 23:51:17 +0000529 unsigned int block[3]
530 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
531 do {
532 __asm__ __volatile__(
533 "mov r5, %0" "\n\t"
534 "ldr r9, [r5, #0]" "\n\t" // p
535 "ldr r10, [r5, #4]" "\n\t" // n
536 "ldrex r8, [r9]" "\n\t"
537 "add r8, r8, r10" "\n\t"
sewardj9bd30a82011-07-11 11:46:52 +0000538 "strex r4, r8, [r9]" "\n\t"
539 "str r4, [r5, #8]" "\n\t"
sewardj1956c502010-09-24 23:51:17 +0000540 : /*out*/
541 : /*in*/ "r"(&block[0])
sewardj9bd30a82011-07-11 11:46:52 +0000542 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
sewardj1956c502010-09-24 23:51:17 +0000543 );
544 } while (block[2] != 0);
sewardj14857fd2014-02-20 17:34:45 +0000545#elif defined(VGA_arm64)
546 unsigned long long int block[3]
547 = { (unsigned long long int)p, (unsigned long long int)n,
548 0xFFFFFFFFFFFFFFFFULL};
549 do {
550 __asm__ __volatile__(
551 "mov x5, %0" "\n\t"
552 "ldr x9, [x5, #0]" "\n\t" // p
553 "ldr x10, [x5, #8]" "\n\t" // n
554 "ldxr w8, [x9]" "\n\t"
555 "add x8, x8, x10" "\n\t"
556 "stxr w4, w8, [x9]" "\n\t"
557 "str x4, [x5, #16]" "\n\t"
558 : /*out*/
559 : /*in*/ "r"(&block[0])
560 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
561 );
562 } while (block[2] != 0);
sewardjb5b87402011-03-07 16:05:35 +0000563#elif defined(VGA_s390x)
564 __asm__ __volatile__(
565 " l 0,%0\n\t"
566 "0: lr 1,0\n\t"
567 " ar 1,%1\n\t"
568 " cs 0,1,%0\n\t"
569 " jl 0b\n\t"
570 : "+m" (*p)
571 : "d" (n)
572 : "cc", "memory", "0", "1");
sewardj5db15402012-06-07 09:13:21 +0000573#elif defined(VGA_mips32)
574 unsigned int block[3]
petarjdd6bf602013-04-03 22:51:23 +0000575 = { (unsigned int)p, (unsigned int)n, 0x0 };
sewardj5db15402012-06-07 09:13:21 +0000576 do {
577 __asm__ __volatile__(
petarjdd6bf602013-04-03 22:51:23 +0000578 "move $t0, %0" "\n\t"
579 "lw $t1, 0($t0)" "\n\t" // p
580 "lw $t2, 4($t0)" "\n\t" // n
581 "ll $t3, 0($t1)" "\n\t"
582 "addu $t3, $t3, $t2" "\n\t"
583 "sc $t3, 0($t1)" "\n\t"
584 "sw $t3, 8($t0)" "\n\t"
sewardj5db15402012-06-07 09:13:21 +0000585 : /*out*/
586 : /*in*/ "r"(&block[0])
petarjdd6bf602013-04-03 22:51:23 +0000587 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
588 );
589 } while (block[2] != 1);
590#elif defined(VGA_mips64)
591 unsigned long block[3]
592 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
593 do {
594 __asm__ __volatile__(
595 "move $t0, %0" "\n\t"
596 "ld $t1, 0($t0)" "\n\t" // p
597 "ld $t2, 8($t0)" "\n\t" // n
598 "ll $t3, 0($t1)" "\n\t"
599 "addu $t3, $t3, $t2" "\n\t"
600 "sc $t3, 0($t1)" "\n\t"
601 "sd $t3, 16($t0)" "\n\t"
602 : /*out*/
603 : /*in*/ "r"(&block[0])
604 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
sewardj5db15402012-06-07 09:13:21 +0000605 );
606 } while (block[2] != 1);
sewardjac258d82009-07-04 12:44:08 +0000607#else
608# error "Unsupported arch"
609#endif
610}
611
612__attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
613{
sewardj5db15402012-06-07 09:13:21 +0000614#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
sewardj571dfb02009-07-04 14:33:53 +0000615 /* do nothing; is not supported */
616#elif defined(VGA_amd64)
617 // this is a bit subtle. It relies on the fact that, on a 64-bit platform,
618 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
sewardjac258d82009-07-04 12:44:08 +0000619 unsigned long long int block[2];
620 block[0] = (unsigned long long int)(unsigned long)p;
621 block[1] = n;
sewardjac258d82009-07-04 12:44:08 +0000622 __asm__ __volatile__(
623 "movq 0(%%rsi),%%rax" "\n\t"
624 "movq 8(%%rsi),%%rbx" "\n\t"
625 "lock; addq %%rbx,(%%rax)" "\n"
626 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
627 );
carlldd690bf2014-08-07 23:49:27 +0000628#elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
sewardj571dfb02009-07-04 14:33:53 +0000629 unsigned long success;
630 do {
631 __asm__ __volatile__(
632 "ldarx 15,0,%1" "\n\t"
633 "add 15,15,%2" "\n\t"
634 "stdcx. 15,0,%1" "\n\t"
635 "mfcr %0" "\n\t"
636 "srwi %0,%0,29" "\n\t"
637 "andi. %0,%0,1" "\n"
638 : /*out*/"=b"(success)
639 : /*in*/ "b"(p), "b"(n)
640 : /*trash*/ "memory", "cc", "r15"
641 );
642 } while (success != 1);
sewardj9bd30a82011-07-11 11:46:52 +0000643#elif defined(VGA_arm)
644 unsigned long long int block[3]
645 = { (unsigned long long int)(unsigned long)p,
646 (unsigned long long int)n,
647 0xFFFFFFFFFFFFFFFFULL };
648 do {
649 __asm__ __volatile__(
650 "mov r5, %0" "\n\t"
651 "ldr r8, [r5, #0]" "\n\t" // p
652 "ldrd r2, r3, [r5, #8]" "\n\t" // n
653 "ldrexd r0, r1, [r8]" "\n\t"
654 "adds r2, r2, r0" "\n\t"
655 "adc r3, r3, r1" "\n\t"
656 "strexd r1, r2, r3, [r8]" "\n\t"
657 "str r1, [r5, #16]" "\n\t"
658 : /*out*/
659 : /*in*/ "r"(&block[0])
660 : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
661 );
662 } while (block[2] != 0xFFFFFFFF00000000ULL);
sewardj14857fd2014-02-20 17:34:45 +0000663#elif defined(VGA_arm64)
664 unsigned long long int block[3]
665 = { (unsigned long long int)p, (unsigned long long int)n,
666 0xFFFFFFFFFFFFFFFFULL};
667 do {
668 __asm__ __volatile__(
669 "mov x5, %0" "\n\t"
670 "ldr x9, [x5, #0]" "\n\t" // p
671 "ldr x10, [x5, #8]" "\n\t" // n
672 "ldxr x8, [x9]" "\n\t"
673 "add x8, x8, x10" "\n\t"
674 "stxr w4, x8, [x9]" "\n\t"
675 "str x4, [x5, #16]" "\n\t"
676 : /*out*/
677 : /*in*/ "r"(&block[0])
678 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
679 );
680 } while (block[2] != 0);
sewardjb5b87402011-03-07 16:05:35 +0000681#elif defined(VGA_s390x)
682 __asm__ __volatile__(
683 " lg 0,%0\n\t"
684 "0: lgr 1,0\n\t"
685 " agr 1,%1\n\t"
686 " csg 0,1,%0\n\t"
687 " jl 0b\n\t"
688 : "+m" (*p)
689 : "d" (n)
690 : "cc", "memory", "0", "1");
petarjdd6bf602013-04-03 22:51:23 +0000691#elif defined(VGA_mips64)
692 unsigned long block[3]
693 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
694 do {
695 __asm__ __volatile__(
696 "move $t0, %0" "\n\t"
697 "ld $t1, 0($t0)" "\n\t" // p
698 "ld $t2, 8($t0)" "\n\t" // n
699 "lld $t3, 0($t1)" "\n\t"
700 "daddu $t3, $t3, $t2" "\n\t"
701 "scd $t3, 0($t1)" "\n\t"
702 "sd $t3, 16($t0)" "\n\t"
703 : /*out*/
704 : /*in*/ "r"(&block[0])
705 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
706 );
707 } while (block[2] != 1);
sewardjac258d82009-07-04 12:44:08 +0000708#else
709# error "Unsupported arch"
710#endif
711}
712
713int main ( int argc, char** argv )
714{
715 int i, status;
716 char* page;
717 char* p8;
718 short* p16;
719 int* p32;
720 long long int* p64;
721 pid_t child, p2;
722
723 printf("parent, pre-fork\n");
724
725 page = mmap( 0, sysconf(_SC_PAGESIZE),
726 PROT_READ|PROT_WRITE,
727 MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
728 if (page == MAP_FAILED) {
729 perror("mmap failed");
730 exit(1);
731 }
732
733 p8 = (char*)(page+0);
734 p16 = (short*)(page+256);
735 p32 = (int*)(page+512);
736 p64 = (long long int*)(page+768);
737
sewardj571dfb02009-07-04 14:33:53 +0000738 assert( IS_8_ALIGNED(p8) );
739 assert( IS_8_ALIGNED(p16) );
740 assert( IS_8_ALIGNED(p32) );
741 assert( IS_8_ALIGNED(p64) );
742
743 memset(page, 0, 1024);
744
sewardjac258d82009-07-04 12:44:08 +0000745 *p8 = 0;
746 *p16 = 0;
747 *p32 = 0;
748 *p64 = 0;
749
750 child = fork();
751 if (child == -1) {
752 perror("fork() failed\n");
753 return 1;
754 }
755
756 if (child == 0) {
757 /* --- CHILD --- */
758 printf("child\n");
759 for (i = 0; i < NNN; i++) {
760 atomic_add_8bit(p8, 1);
761 atomic_add_16bit(p16, 1);
762 atomic_add_32bit(p32, 1);
763 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
764 }
765 return 1;
766 /* NOTREACHED */
767
768 }
769
770 /* --- PARENT --- */
771
772 printf("parent\n");
773
774 for (i = 0; i < NNN; i++) {
775 atomic_add_8bit(p8, 1);
776 atomic_add_16bit(p16, 1);
777 atomic_add_32bit(p32, 1);
778 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
779 }
780
781 p2 = waitpid(child, &status, 0);
782 assert(p2 == child);
783
784 /* assert that child finished normally */
785 assert(WIFEXITED(status));
786
787 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n",
sewardj571dfb02009-07-04 14:33:53 +0000788 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
sewardjac258d82009-07-04 12:44:08 +0000789
sewardj571dfb02009-07-04 14:33:53 +0000790 if (-74 == (int)(*(signed char*)p8)
sewardjac258d82009-07-04 12:44:08 +0000791 && 32694 == (int)(*p16)
792 && 6913974 == *p32
sewardj571dfb02009-07-04 14:33:53 +0000793 && (0LL == *p64 || 682858642110LL == *p64)) {
sewardjac258d82009-07-04 12:44:08 +0000794 printf("PASS\n");
795 } else {
796 printf("FAIL -- see source code for expected values\n");
797 }
798
799 printf("parent exits\n");
800
801 return 0;
802}