sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 1 | |
| 2 | /* This is an example of a program which does atomic memory operations |
| 3 | between two processes which share a page. Valgrind 3.4.1 and |
| 4 | earlier produce incorrect answers because it does not preserve |
| 5 | atomicity of the relevant instructions in the generated code; but |
| 6 | the post-DCAS-merge versions of Valgrind do behave correctly. */ |
| 7 | |
sewardj | 9bd30a8 | 2011-07-11 11:46:52 +0000 | [diff] [blame] | 8 | /* On ARM, this can be compiled into either ARM or Thumb code, so as |
| 9 | to test both A and T encodings of LDREX/STREX et al. Also on ARM, |
| 10 | it tests doubleword atomics (LDREXD, STREXD) which I don't think it |
| 11 | does on any other platform. */ |
| 12 | |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 13 | #include <stdlib.h> |
| 14 | #include <stdio.h> |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 15 | #include <string.h> |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 16 | #include <assert.h> |
| 17 | #include <unistd.h> |
| 18 | #include <sys/wait.h> |
njn | 626341c | 2009-07-12 22:58:26 +0000 | [diff] [blame] | 19 | #include "tests/sys_mman.h" |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 20 | |
| 21 | #define NNN 3456987 |
| 22 | |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 23 | #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) |
| 24 | |
| 25 | |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 26 | __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) |
| 27 | { |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 28 | #if defined(VGA_x86) |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 29 | unsigned long block[2]; |
| 30 | block[0] = (unsigned long)p; |
| 31 | block[1] = n; |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 32 | __asm__ __volatile__( |
| 33 | "movl 0(%%esi),%%eax" "\n\t" |
| 34 | "movl 4(%%esi),%%ebx" "\n\t" |
| 35 | "lock; addb %%bl,(%%eax)" "\n" |
| 36 | : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" |
| 37 | ); |
| 38 | #elif defined(VGA_amd64) |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 39 | unsigned long block[2]; |
| 40 | block[0] = (unsigned long)p; |
| 41 | block[1] = n; |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 42 | __asm__ __volatile__( |
| 43 | "movq 0(%%rsi),%%rax" "\n\t" |
| 44 | "movq 8(%%rsi),%%rbx" "\n\t" |
| 45 | "lock; addb %%bl,(%%rax)" "\n" |
| 46 | : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" |
| 47 | ); |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 48 | #elif defined(VGA_ppc32) |
| 49 | /* Nasty hack. Does correctly atomically do *p += n, but only if p |
sewardj | 59570ff | 2010-01-01 11:59:33 +0000 | [diff] [blame] | 50 | is 4-aligned -- guaranteed by caller. */ |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 51 | unsigned long success; |
| 52 | do { |
| 53 | __asm__ __volatile__( |
| 54 | "lwarx 15,0,%1" "\n\t" |
| 55 | "add 15,15,%2" "\n\t" |
| 56 | "stwcx. 15,0,%1" "\n\t" |
| 57 | "mfcr %0" "\n\t" |
| 58 | "srwi %0,%0,29" "\n\t" |
| 59 | "andi. %0,%0,1" "\n" |
| 60 | : /*out*/"=b"(success) |
| 61 | : /*in*/ "b"(p), "b"(((unsigned long)n) << 24) |
| 62 | : /*trash*/ "memory", "cc", "r15" |
| 63 | ); |
| 64 | } while (success != 1); |
carll | cae0cc2 | 2014-08-07 23:17:29 +0000 | [diff] [blame] | 65 | #elif defined(VGA_ppc64be) |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 66 | /* Nasty hack. Does correctly atomically do *p += n, but only if p |
| 67 | is 8-aligned -- guaranteed by caller. */ |
| 68 | unsigned long success; |
| 69 | do { |
| 70 | __asm__ __volatile__( |
| 71 | "ldarx 15,0,%1" "\n\t" |
| 72 | "add 15,15,%2" "\n\t" |
| 73 | "stdcx. 15,0,%1" "\n\t" |
| 74 | "mfcr %0" "\n\t" |
| 75 | "srwi %0,%0,29" "\n\t" |
| 76 | "andi. %0,%0,1" "\n" |
| 77 | : /*out*/"=b"(success) |
| 78 | : /*in*/ "b"(p), "b"(((unsigned long)n) << 56) |
| 79 | : /*trash*/ "memory", "cc", "r15" |
| 80 | ); |
| 81 | } while (success != 1); |
carll | dd690bf | 2014-08-07 23:49:27 +0000 | [diff] [blame] | 82 | #elif defined(VGA_ppc64le) |
| 83 | /* Nasty hack. Does correctly atomically do *p += n, but only if p |
| 84 | is 8-aligned -- guaranteed by caller. */ |
| 85 | unsigned long success; |
| 86 | do { |
| 87 | __asm__ __volatile__( |
| 88 | "ldarx 15,0,%1" "\n\t" |
| 89 | "add 15,15,%2" "\n\t" |
| 90 | "stdcx. 15,0,%1" "\n\t" |
| 91 | "mfcr %0" "\n\t" |
| 92 | "srwi %0,%0,29" "\n\t" |
| 93 | "andi. %0,%0,1" "\n" |
| 94 | : /*out*/"=b"(success) |
| 95 | : /*in*/ "b"(p), "b"(((unsigned long)n)) |
| 96 | : /*trash*/ "memory", "cc", "r15" |
| 97 | ); |
| 98 | } while (success != 1); |
sewardj | 59570ff | 2010-01-01 11:59:33 +0000 | [diff] [blame] | 99 | #elif defined(VGA_arm) |
sewardj | 9bd30a8 | 2011-07-11 11:46:52 +0000 | [diff] [blame] | 100 | unsigned int block[3] |
| 101 | = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; |
| 102 | do { |
| 103 | __asm__ __volatile__( |
| 104 | "mov r5, %0" "\n\t" |
| 105 | "ldr r9, [r5, #0]" "\n\t" // p |
| 106 | "ldr r10, [r5, #4]" "\n\t" // n |
| 107 | "ldrexb r8, [r9]" "\n\t" |
| 108 | "add r8, r8, r10" "\n\t" |
| 109 | "strexb r4, r8, [r9]" "\n\t" |
| 110 | "str r4, [r5, #8]" "\n\t" |
| 111 | : /*out*/ |
| 112 | : /*in*/ "r"(&block[0]) |
| 113 | : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" |
| 114 | ); |
| 115 | } while (block[2] != 0); |
sewardj | 14857fd | 2014-02-20 17:34:45 +0000 | [diff] [blame] | 116 | #elif defined(VGA_arm64) |
| 117 | unsigned long long int block[3] |
| 118 | = { (unsigned long long int)p, (unsigned long long int)n, |
| 119 | 0xFFFFFFFFFFFFFFFFULL}; |
| 120 | do { |
| 121 | __asm__ __volatile__( |
| 122 | "mov x5, %0" "\n\t" |
| 123 | "ldr x9, [x5, #0]" "\n\t" // p |
| 124 | "ldr x10, [x5, #8]" "\n\t" // n |
| 125 | "ldxrb w8, [x9]" "\n\t" |
| 126 | "add x8, x8, x10" "\n\t" |
| 127 | "stxrb w4, w8, [x9]" "\n\t" |
| 128 | "str x4, [x5, #16]" "\n\t" |
| 129 | : /*out*/ |
| 130 | : /*in*/ "r"(&block[0]) |
| 131 | : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" |
| 132 | ); |
| 133 | } while (block[2] != 0); |
sewardj | b5b8740 | 2011-03-07 16:05:35 +0000 | [diff] [blame] | 134 | #elif defined(VGA_s390x) |
| 135 | int dummy; |
| 136 | __asm__ __volatile__( |
| 137 | " l 0,%0\n\t" |
| 138 | "0: st 0,%1\n\t" |
| 139 | " icm 1,1,%1\n\t" |
| 140 | " ar 1,%2\n\t" |
| 141 | " stcm 1,1,%1\n\t" |
| 142 | " l 1,%1\n\t" |
| 143 | " cs 0,1,%0\n\t" |
| 144 | " jl 0b\n\t" |
| 145 | : "+m" (*p), "+m" (dummy) |
| 146 | : "d" (n) |
| 147 | : "cc", "memory", "0", "1"); |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 148 | #elif defined(VGA_mips32) |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 149 | /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an |
| 150 | exception that can cause this function to fail. */ |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 151 | #if defined (_MIPSEL) |
| 152 | unsigned int block[3] |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 153 | = { (unsigned int)p, (unsigned int)n, 0x0 }; |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 154 | do { |
| 155 | __asm__ __volatile__( |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 156 | "move $t0, %0" "\n\t" |
| 157 | "lw $t1, 0($t0)" "\n\t" // p |
| 158 | "lw $t2, 4($t0)" "\n\t" // n |
| 159 | "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF |
| 160 | "li $t4, 0xFF" "\n\t" |
| 161 | "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00 |
| 162 | "ll $t3, 0($t1)" "\n\t" // $t3 = old value |
| 163 | "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00 |
| 164 | "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n |
| 165 | "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF |
| 166 | "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 167 | "sc $t3, 0($t1)" "\n\t" |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 168 | "sw $t3, 8($t0)" "\n\t" // save result |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 169 | : /*out*/ |
| 170 | : /*in*/ "r"(&block[0]) |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 171 | : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 172 | ); |
| 173 | } while (block[2] != 1); |
| 174 | #elif defined (_MIPSEB) |
| 175 | unsigned int block[3] |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 176 | = { (unsigned int)p, (unsigned int)n << 24, 0x0 }; |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 177 | do { |
| 178 | __asm__ __volatile__( |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 179 | "move $t0, %0" "\n\t" |
| 180 | "lw $t1, 0($t0)" "\n\t" // p |
| 181 | "lw $t2, 4($t0)" "\n\t" // n |
| 182 | "ll $t3, 0($t1)" "\n\t" |
| 183 | "addu $t3, $t3, $t2" "\n\t" |
| 184 | "sc $t3, 0($t1)" "\n\t" |
| 185 | "sw $t3, 8($t0)" "\n\t" |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 186 | : /*out*/ |
| 187 | : /*in*/ "r"(&block[0]) |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 188 | : /*trash*/ "memory", "t0", "t1", "t2", "t3" |
| 189 | ); |
| 190 | } while (block[2] != 1); |
| 191 | #endif |
| 192 | #elif defined(VGA_mips64) |
| 193 | /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an |
| 194 | exception that can cause this function to fail. */ |
| 195 | #if defined (_MIPSEL) |
| 196 | unsigned long block[3] |
| 197 | = { (unsigned long)p, (unsigned long)n, 0x0ULL }; |
| 198 | do { |
| 199 | __asm__ __volatile__( |
| 200 | "move $t0, %0" "\n\t" |
| 201 | "ld $t1, 0($t0)" "\n\t" // p |
| 202 | "ld $t2, 8($t0)" "\n\t" // n |
| 203 | "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF |
| 204 | "li $s0, 0xFF" "\n\t" |
| 205 | "nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00 |
| 206 | "ll $t3, 0($t1)" "\n\t" // $t3 = old value |
| 207 | "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00 |
| 208 | "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n |
| 209 | "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF |
| 210 | "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 |
| 211 | "sc $t3, 0($t1)" "\n\t" |
| 212 | "sw $t3, 16($t0)" "\n\t" // save result |
| 213 | : /*out*/ |
| 214 | : /*in*/ "r"(&block[0]) |
| 215 | : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 216 | ); |
| 217 | } while (block[2] != 1); |
petarj | 767f92e | 2013-05-14 13:33:27 +0000 | [diff] [blame] | 218 | #elif defined (_MIPSEB) |
| 219 | unsigned long block[3] |
| 220 | = { (unsigned long)p, (unsigned long)n << 56, 0x0 }; |
| 221 | do { |
| 222 | __asm__ __volatile__( |
| 223 | "move $t0, %0" "\n\t" |
| 224 | "ld $t1, 0($t0)" "\n\t" // p |
| 225 | "ld $t2, 8($t0)" "\n\t" // n |
| 226 | "lld $t3, 0($t1)" "\n\t" |
| 227 | "daddu $t3, $t3, $t2" "\n\t" |
| 228 | "scd $t3, 0($t1)" "\n\t" |
| 229 | "sd $t3, 16($t0)" "\n\t" |
| 230 | : /*out*/ |
| 231 | : /*in*/ "r"(&block[0]) |
| 232 | : /*trash*/ "memory", "t0", "t1", "t2", "t3" |
| 233 | ); |
| 234 | } while (block[2] != 1); |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 235 | #endif |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 236 | #else |
| 237 | # error "Unsupported arch" |
| 238 | #endif |
| 239 | } |
| 240 | |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 241 | |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 242 | __attribute__((noinline)) void atomic_add_16bit ( short* p, int n ) |
| 243 | { |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 244 | #if defined(VGA_x86) |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 245 | unsigned long block[2]; |
| 246 | block[0] = (unsigned long)p; |
| 247 | block[1] = n; |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 248 | __asm__ __volatile__( |
| 249 | "movl 0(%%esi),%%eax" "\n\t" |
| 250 | "movl 4(%%esi),%%ebx" "\n\t" |
| 251 | "lock; addw %%bx,(%%eax)" "\n" |
| 252 | : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" |
| 253 | ); |
| 254 | #elif defined(VGA_amd64) |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 255 | unsigned long block[2]; |
| 256 | block[0] = (unsigned long)p; |
| 257 | block[1] = n; |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 258 | __asm__ __volatile__( |
| 259 | "movq 0(%%rsi),%%rax" "\n\t" |
| 260 | "movq 8(%%rsi),%%rbx" "\n\t" |
| 261 | "lock; addw %%bx,(%%rax)" "\n" |
| 262 | : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" |
| 263 | ); |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 264 | #elif defined(VGA_ppc32) |
| 265 | /* Nasty hack. Does correctly atomically do *p += n, but only if p |
| 266 | is 8-aligned -- guaranteed by caller. */ |
| 267 | unsigned long success; |
| 268 | do { |
| 269 | __asm__ __volatile__( |
| 270 | "lwarx 15,0,%1" "\n\t" |
| 271 | "add 15,15,%2" "\n\t" |
| 272 | "stwcx. 15,0,%1" "\n\t" |
| 273 | "mfcr %0" "\n\t" |
| 274 | "srwi %0,%0,29" "\n\t" |
| 275 | "andi. %0,%0,1" "\n" |
| 276 | : /*out*/"=b"(success) |
| 277 | : /*in*/ "b"(p), "b"(((unsigned long)n) << 16) |
| 278 | : /*trash*/ "memory", "cc", "r15" |
| 279 | ); |
| 280 | } while (success != 1); |
carll | cae0cc2 | 2014-08-07 23:17:29 +0000 | [diff] [blame] | 281 | #elif defined(VGA_ppc64be) |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 282 | /* Nasty hack. Does correctly atomically do *p += n, but only if p |
| 283 | is 8-aligned -- guaranteed by caller. */ |
| 284 | unsigned long success; |
| 285 | do { |
| 286 | __asm__ __volatile__( |
| 287 | "ldarx 15,0,%1" "\n\t" |
| 288 | "add 15,15,%2" "\n\t" |
| 289 | "stdcx. 15,0,%1" "\n\t" |
| 290 | "mfcr %0" "\n\t" |
| 291 | "srwi %0,%0,29" "\n\t" |
| 292 | "andi. %0,%0,1" "\n" |
| 293 | : /*out*/"=b"(success) |
| 294 | : /*in*/ "b"(p), "b"(((unsigned long)n) << 48) |
| 295 | : /*trash*/ "memory", "cc", "r15" |
| 296 | ); |
| 297 | } while (success != 1); |
carll | dd690bf | 2014-08-07 23:49:27 +0000 | [diff] [blame] | 298 | #elif defined(VGA_ppc64le) |
| 299 | /* Nasty hack. Does correctly atomically do *p += n, but only if p |
| 300 | is 8-aligned -- guaranteed by caller. */ |
| 301 | unsigned long success; |
| 302 | do { |
| 303 | __asm__ __volatile__( |
| 304 | "ldarx 15,0,%1" "\n\t" |
| 305 | "add 15,15,%2" "\n\t" |
| 306 | "stdcx. 15,0,%1" "\n\t" |
| 307 | "mfcr %0" "\n\t" |
| 308 | "srwi %0,%0,29" "\n\t" |
| 309 | "andi. %0,%0,1" "\n" |
| 310 | : /*out*/"=b"(success) |
| 311 | : /*in*/ "b"(p), "b"(((unsigned long)n)) |
| 312 | : /*trash*/ "memory", "cc", "r15" |
| 313 | ); |
| 314 | } while (success != 1); |
sewardj | 59570ff | 2010-01-01 11:59:33 +0000 | [diff] [blame] | 315 | #elif defined(VGA_arm) |
sewardj | 9bd30a8 | 2011-07-11 11:46:52 +0000 | [diff] [blame] | 316 | unsigned int block[3] |
| 317 | = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; |
| 318 | do { |
| 319 | __asm__ __volatile__( |
| 320 | "mov r5, %0" "\n\t" |
| 321 | "ldr r9, [r5, #0]" "\n\t" // p |
| 322 | "ldr r10, [r5, #4]" "\n\t" // n |
| 323 | "ldrexh r8, [r9]" "\n\t" |
| 324 | "add r8, r8, r10" "\n\t" |
| 325 | "strexh r4, r8, [r9]" "\n\t" |
| 326 | "str r4, [r5, #8]" "\n\t" |
| 327 | : /*out*/ |
| 328 | : /*in*/ "r"(&block[0]) |
| 329 | : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" |
| 330 | ); |
| 331 | } while (block[2] != 0); |
sewardj | 14857fd | 2014-02-20 17:34:45 +0000 | [diff] [blame] | 332 | #elif defined(VGA_arm64) |
| 333 | unsigned long long int block[3] |
| 334 | = { (unsigned long long int)p, (unsigned long long int)n, |
| 335 | 0xFFFFFFFFFFFFFFFFULL}; |
| 336 | do { |
| 337 | __asm__ __volatile__( |
| 338 | "mov x5, %0" "\n\t" |
| 339 | "ldr x9, [x5, #0]" "\n\t" // p |
| 340 | "ldr x10, [x5, #8]" "\n\t" // n |
| 341 | "ldxrh w8, [x9]" "\n\t" |
| 342 | "add x8, x8, x10" "\n\t" |
| 343 | "stxrh w4, w8, [x9]" "\n\t" |
| 344 | "str x4, [x5, #16]" "\n\t" |
| 345 | : /*out*/ |
| 346 | : /*in*/ "r"(&block[0]) |
| 347 | : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" |
| 348 | ); |
| 349 | } while (block[2] != 0); |
sewardj | b5b8740 | 2011-03-07 16:05:35 +0000 | [diff] [blame] | 350 | #elif defined(VGA_s390x) |
| 351 | int dummy; |
| 352 | __asm__ __volatile__( |
| 353 | " l 0,%0\n\t" |
| 354 | "0: st 0,%1\n\t" |
| 355 | " icm 1,3,%1\n\t" |
| 356 | " ar 1,%2\n\t" |
| 357 | " stcm 1,3,%1\n\t" |
| 358 | " l 1,%1\n\t" |
| 359 | " cs 0,1,%0\n\t" |
| 360 | " jl 0b\n\t" |
| 361 | : "+m" (*p), "+m" (dummy) |
| 362 | : "d" (n) |
| 363 | : "cc", "memory", "0", "1"); |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 364 | #elif defined(VGA_mips32) |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 365 | /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an |
| 366 | exception that can cause this function to fail. */ |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 367 | #if defined (_MIPSEL) |
| 368 | unsigned int block[3] |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 369 | = { (unsigned int)p, (unsigned int)n, 0x0 }; |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 370 | do { |
| 371 | __asm__ __volatile__( |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 372 | "move $t0, %0" "\n\t" |
| 373 | "lw $t1, 0($t0)" "\n\t" // p |
| 374 | "lw $t2, 4($t0)" "\n\t" // n |
| 375 | "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF |
| 376 | "li $t4, 0xFFFF" "\n\t" |
| 377 | "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000 |
| 378 | "ll $t3, 0($t1)" "\n\t" // $t3 = old value |
| 379 | "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000 |
| 380 | "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n |
| 381 | "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF |
| 382 | "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 383 | "sc $t3, 0($t1)" "\n\t" |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 384 | "sw $t3, 8($t0)" "\n\t" // save result |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 385 | : /*out*/ |
| 386 | : /*in*/ "r"(&block[0]) |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 387 | : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 388 | ); |
| 389 | } while (block[2] != 1); |
| 390 | #elif defined (_MIPSEB) |
| 391 | unsigned int block[3] |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 392 | = { (unsigned int)p, (unsigned int)n << 16, 0x0 }; |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 393 | do { |
| 394 | __asm__ __volatile__( |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 395 | "move $t0, %0" "\n\t" |
| 396 | "lw $t1, 0($t0)" "\n\t" // p |
| 397 | "lw $t2, 4($t0)" "\n\t" // n |
| 398 | "ll $t3, 0($t1)" "\n\t" |
| 399 | "addu $t3, $t3, $t2" "\n\t" |
| 400 | "sc $t3, 0($t1)" "\n\t" |
| 401 | "sw $t3, 8($t0)" "\n\t" |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 402 | : /*out*/ |
| 403 | : /*in*/ "r"(&block[0]) |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 404 | : /*trash*/ "memory", "t0", "t1", "t2", "t3" |
| 405 | ); |
| 406 | } while (block[2] != 1); |
| 407 | #endif |
| 408 | #elif defined(VGA_mips64) |
| 409 | /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an |
| 410 | exception that can cause this function to fail. */ |
| 411 | #if defined (_MIPSEL) |
| 412 | unsigned long block[3] |
| 413 | = { (unsigned long)p, (unsigned long)n, 0x0ULL }; |
| 414 | do { |
| 415 | __asm__ __volatile__( |
| 416 | "move $t0, %0" "\n\t" |
| 417 | "ld $t1, 0($t0)" "\n\t" // p |
| 418 | "ld $t2, 8($t0)" "\n\t" // n |
| 419 | "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF |
| 420 | "li $s0, 0xFFFF" "\n\t" |
| 421 | "nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000 |
| 422 | "ll $t3, 0($t1)" "\n\t" // $t3 = old value |
| 423 | "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000 |
| 424 | "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n |
| 425 | "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF |
| 426 | "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 |
| 427 | "sc $t3, 0($t1)" "\n\t" |
| 428 | "sw $t3, 16($t0)" "\n\t" // save result |
| 429 | : /*out*/ |
| 430 | : /*in*/ "r"(&block[0]) |
| 431 | : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 432 | ); |
| 433 | } while (block[2] != 1); |
petarj | 767f92e | 2013-05-14 13:33:27 +0000 | [diff] [blame] | 434 | #elif defined (_MIPSEB) |
| 435 | unsigned long block[3] |
| 436 | = { (unsigned long)p, (unsigned long)n << 48, 0x0 }; |
| 437 | do { |
| 438 | __asm__ __volatile__( |
| 439 | "move $t0, %0" "\n\t" |
| 440 | "ld $t1, 0($t0)" "\n\t" // p |
| 441 | "ld $t2, 8($t0)" "\n\t" // n |
| 442 | "lld $t3, 0($t1)" "\n\t" |
| 443 | "daddu $t3, $t3, $t2" "\n\t" |
| 444 | "scd $t3, 0($t1)" "\n\t" |
| 445 | "sd $t3, 16($t0)" "\n\t" |
| 446 | : /*out*/ |
| 447 | : /*in*/ "r"(&block[0]) |
| 448 | : /*trash*/ "memory", "t0", "t1", "t2", "t3" |
| 449 | ); |
| 450 | } while (block[2] != 1); |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 451 | #endif |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 452 | #else |
| 453 | # error "Unsupported arch" |
| 454 | #endif |
| 455 | } |
| 456 | |
| 457 | __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) |
| 458 | { |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 459 | #if defined(VGA_x86) |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 460 | unsigned long block[2]; |
| 461 | block[0] = (unsigned long)p; |
| 462 | block[1] = n; |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 463 | __asm__ __volatile__( |
| 464 | "movl 0(%%esi),%%eax" "\n\t" |
| 465 | "movl 4(%%esi),%%ebx" "\n\t" |
| 466 | "lock; addl %%ebx,(%%eax)" "\n" |
| 467 | : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx" |
| 468 | ); |
| 469 | #elif defined(VGA_amd64) |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 470 | unsigned long block[2]; |
| 471 | block[0] = (unsigned long)p; |
| 472 | block[1] = n; |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 473 | __asm__ __volatile__( |
| 474 | "movq 0(%%rsi),%%rax" "\n\t" |
| 475 | "movq 8(%%rsi),%%rbx" "\n\t" |
| 476 | "lock; addl %%ebx,(%%rax)" "\n" |
| 477 | : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" |
| 478 | ); |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 479 | #elif defined(VGA_ppc32) |
| 480 | unsigned long success; |
| 481 | do { |
| 482 | __asm__ __volatile__( |
| 483 | "lwarx 15,0,%1" "\n\t" |
| 484 | "add 15,15,%2" "\n\t" |
| 485 | "stwcx. 15,0,%1" "\n\t" |
| 486 | "mfcr %0" "\n\t" |
| 487 | "srwi %0,%0,29" "\n\t" |
| 488 | "andi. %0,%0,1" "\n" |
| 489 | : /*out*/"=b"(success) |
| 490 | : /*in*/ "b"(p), "b"(n) |
| 491 | : /*trash*/ "memory", "cc", "r15" |
| 492 | ); |
| 493 | } while (success != 1); |
carll | cae0cc2 | 2014-08-07 23:17:29 +0000 | [diff] [blame] | 494 | #elif defined(VGA_ppc64be) |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 495 | /* Nasty hack. Does correctly atomically do *p += n, but only if p |
| 496 | is 8-aligned -- guaranteed by caller. */ |
| 497 | unsigned long success; |
| 498 | do { |
| 499 | __asm__ __volatile__( |
| 500 | "ldarx 15,0,%1" "\n\t" |
| 501 | "add 15,15,%2" "\n\t" |
| 502 | "stdcx. 15,0,%1" "\n\t" |
| 503 | "mfcr %0" "\n\t" |
| 504 | "srwi %0,%0,29" "\n\t" |
| 505 | "andi. %0,%0,1" "\n" |
| 506 | : /*out*/"=b"(success) |
| 507 | : /*in*/ "b"(p), "b"(((unsigned long)n) << 32) |
| 508 | : /*trash*/ "memory", "cc", "r15" |
| 509 | ); |
| 510 | } while (success != 1); |
carll | dd690bf | 2014-08-07 23:49:27 +0000 | [diff] [blame] | 511 | #elif defined(VGA_ppc64le) |
| 512 | /* Nasty hack. Does correctly atomically do *p += n, but only if p |
| 513 | is 8-aligned -- guaranteed by caller. */ |
| 514 | unsigned long success; |
| 515 | do { |
| 516 | __asm__ __volatile__( |
| 517 | "ldarx 15,0,%1" "\n\t" |
| 518 | "add 15,15,%2" "\n\t" |
| 519 | "stdcx. 15,0,%1" "\n\t" |
| 520 | "mfcr %0" "\n\t" |
| 521 | "srwi %0,%0,29" "\n\t" |
| 522 | "andi. %0,%0,1" "\n" |
| 523 | : /*out*/"=b"(success) |
| 524 | : /*in*/ "b"(p), "b"(((unsigned long)n)) |
| 525 | : /*trash*/ "memory", "cc", "r15" |
| 526 | ); |
| 527 | } while (success != 1); |
sewardj | 59570ff | 2010-01-01 11:59:33 +0000 | [diff] [blame] | 528 | #elif defined(VGA_arm) |
sewardj | 1956c50 | 2010-09-24 23:51:17 +0000 | [diff] [blame] | 529 | unsigned int block[3] |
| 530 | = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; |
| 531 | do { |
| 532 | __asm__ __volatile__( |
| 533 | "mov r5, %0" "\n\t" |
| 534 | "ldr r9, [r5, #0]" "\n\t" // p |
| 535 | "ldr r10, [r5, #4]" "\n\t" // n |
| 536 | "ldrex r8, [r9]" "\n\t" |
| 537 | "add r8, r8, r10" "\n\t" |
sewardj | 9bd30a8 | 2011-07-11 11:46:52 +0000 | [diff] [blame] | 538 | "strex r4, r8, [r9]" "\n\t" |
| 539 | "str r4, [r5, #8]" "\n\t" |
sewardj | 1956c50 | 2010-09-24 23:51:17 +0000 | [diff] [blame] | 540 | : /*out*/ |
| 541 | : /*in*/ "r"(&block[0]) |
sewardj | 9bd30a8 | 2011-07-11 11:46:52 +0000 | [diff] [blame] | 542 | : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" |
sewardj | 1956c50 | 2010-09-24 23:51:17 +0000 | [diff] [blame] | 543 | ); |
| 544 | } while (block[2] != 0); |
sewardj | 14857fd | 2014-02-20 17:34:45 +0000 | [diff] [blame] | 545 | #elif defined(VGA_arm64) |
| 546 | unsigned long long int block[3] |
| 547 | = { (unsigned long long int)p, (unsigned long long int)n, |
| 548 | 0xFFFFFFFFFFFFFFFFULL}; |
| 549 | do { |
| 550 | __asm__ __volatile__( |
| 551 | "mov x5, %0" "\n\t" |
| 552 | "ldr x9, [x5, #0]" "\n\t" // p |
| 553 | "ldr x10, [x5, #8]" "\n\t" // n |
| 554 | "ldxr w8, [x9]" "\n\t" |
| 555 | "add x8, x8, x10" "\n\t" |
| 556 | "stxr w4, w8, [x9]" "\n\t" |
| 557 | "str x4, [x5, #16]" "\n\t" |
| 558 | : /*out*/ |
| 559 | : /*in*/ "r"(&block[0]) |
| 560 | : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" |
| 561 | ); |
| 562 | } while (block[2] != 0); |
sewardj | b5b8740 | 2011-03-07 16:05:35 +0000 | [diff] [blame] | 563 | #elif defined(VGA_s390x) |
| 564 | __asm__ __volatile__( |
| 565 | " l 0,%0\n\t" |
| 566 | "0: lr 1,0\n\t" |
| 567 | " ar 1,%1\n\t" |
| 568 | " cs 0,1,%0\n\t" |
| 569 | " jl 0b\n\t" |
| 570 | : "+m" (*p) |
| 571 | : "d" (n) |
| 572 | : "cc", "memory", "0", "1"); |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 573 | #elif defined(VGA_mips32) |
| 574 | unsigned int block[3] |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 575 | = { (unsigned int)p, (unsigned int)n, 0x0 }; |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 576 | do { |
| 577 | __asm__ __volatile__( |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 578 | "move $t0, %0" "\n\t" |
| 579 | "lw $t1, 0($t0)" "\n\t" // p |
| 580 | "lw $t2, 4($t0)" "\n\t" // n |
| 581 | "ll $t3, 0($t1)" "\n\t" |
| 582 | "addu $t3, $t3, $t2" "\n\t" |
| 583 | "sc $t3, 0($t1)" "\n\t" |
| 584 | "sw $t3, 8($t0)" "\n\t" |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 585 | : /*out*/ |
| 586 | : /*in*/ "r"(&block[0]) |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 587 | : /*trash*/ "memory", "t0", "t1", "t2", "t3" |
| 588 | ); |
| 589 | } while (block[2] != 1); |
| 590 | #elif defined(VGA_mips64) |
| 591 | unsigned long block[3] |
| 592 | = { (unsigned long)p, (unsigned long)n, 0x0ULL }; |
| 593 | do { |
| 594 | __asm__ __volatile__( |
| 595 | "move $t0, %0" "\n\t" |
| 596 | "ld $t1, 0($t0)" "\n\t" // p |
| 597 | "ld $t2, 8($t0)" "\n\t" // n |
| 598 | "ll $t3, 0($t1)" "\n\t" |
| 599 | "addu $t3, $t3, $t2" "\n\t" |
| 600 | "sc $t3, 0($t1)" "\n\t" |
| 601 | "sd $t3, 16($t0)" "\n\t" |
| 602 | : /*out*/ |
| 603 | : /*in*/ "r"(&block[0]) |
| 604 | : /*trash*/ "memory", "t0", "t1", "t2", "t3" |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 605 | ); |
| 606 | } while (block[2] != 1); |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 607 | #else |
| 608 | # error "Unsupported arch" |
| 609 | #endif |
| 610 | } |
| 611 | |
| 612 | __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) |
| 613 | { |
sewardj | 5db1540 | 2012-06-07 09:13:21 +0000 | [diff] [blame] | 614 | #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 615 | /* do nothing; is not supported */ |
| 616 | #elif defined(VGA_amd64) |
| 617 | // this is a bit subtle. It relies on the fact that, on a 64-bit platform, |
| 618 | // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*) |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 619 | unsigned long long int block[2]; |
| 620 | block[0] = (unsigned long long int)(unsigned long)p; |
| 621 | block[1] = n; |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 622 | __asm__ __volatile__( |
| 623 | "movq 0(%%rsi),%%rax" "\n\t" |
| 624 | "movq 8(%%rsi),%%rbx" "\n\t" |
| 625 | "lock; addq %%rbx,(%%rax)" "\n" |
| 626 | : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx" |
| 627 | ); |
carll | dd690bf | 2014-08-07 23:49:27 +0000 | [diff] [blame] | 628 | #elif defined(VGA_ppc64be) || defined(VGA_ppc64le) |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 629 | unsigned long success; |
| 630 | do { |
| 631 | __asm__ __volatile__( |
| 632 | "ldarx 15,0,%1" "\n\t" |
| 633 | "add 15,15,%2" "\n\t" |
| 634 | "stdcx. 15,0,%1" "\n\t" |
| 635 | "mfcr %0" "\n\t" |
| 636 | "srwi %0,%0,29" "\n\t" |
| 637 | "andi. %0,%0,1" "\n" |
| 638 | : /*out*/"=b"(success) |
| 639 | : /*in*/ "b"(p), "b"(n) |
| 640 | : /*trash*/ "memory", "cc", "r15" |
| 641 | ); |
| 642 | } while (success != 1); |
sewardj | 9bd30a8 | 2011-07-11 11:46:52 +0000 | [diff] [blame] | 643 | #elif defined(VGA_arm) |
| 644 | unsigned long long int block[3] |
| 645 | = { (unsigned long long int)(unsigned long)p, |
| 646 | (unsigned long long int)n, |
| 647 | 0xFFFFFFFFFFFFFFFFULL }; |
| 648 | do { |
| 649 | __asm__ __volatile__( |
| 650 | "mov r5, %0" "\n\t" |
| 651 | "ldr r8, [r5, #0]" "\n\t" // p |
| 652 | "ldrd r2, r3, [r5, #8]" "\n\t" // n |
| 653 | "ldrexd r0, r1, [r8]" "\n\t" |
| 654 | "adds r2, r2, r0" "\n\t" |
| 655 | "adc r3, r3, r1" "\n\t" |
| 656 | "strexd r1, r2, r3, [r8]" "\n\t" |
| 657 | "str r1, [r5, #16]" "\n\t" |
| 658 | : /*out*/ |
| 659 | : /*in*/ "r"(&block[0]) |
| 660 | : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3" |
| 661 | ); |
| 662 | } while (block[2] != 0xFFFFFFFF00000000ULL); |
sewardj | 14857fd | 2014-02-20 17:34:45 +0000 | [diff] [blame] | 663 | #elif defined(VGA_arm64) |
| 664 | unsigned long long int block[3] |
| 665 | = { (unsigned long long int)p, (unsigned long long int)n, |
| 666 | 0xFFFFFFFFFFFFFFFFULL}; |
| 667 | do { |
| 668 | __asm__ __volatile__( |
| 669 | "mov x5, %0" "\n\t" |
| 670 | "ldr x9, [x5, #0]" "\n\t" // p |
| 671 | "ldr x10, [x5, #8]" "\n\t" // n |
| 672 | "ldxr x8, [x9]" "\n\t" |
| 673 | "add x8, x8, x10" "\n\t" |
| 674 | "stxr w4, x8, [x9]" "\n\t" |
| 675 | "str x4, [x5, #16]" "\n\t" |
| 676 | : /*out*/ |
| 677 | : /*in*/ "r"(&block[0]) |
| 678 | : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4" |
| 679 | ); |
| 680 | } while (block[2] != 0); |
sewardj | b5b8740 | 2011-03-07 16:05:35 +0000 | [diff] [blame] | 681 | #elif defined(VGA_s390x) |
| 682 | __asm__ __volatile__( |
| 683 | " lg 0,%0\n\t" |
| 684 | "0: lgr 1,0\n\t" |
| 685 | " agr 1,%1\n\t" |
| 686 | " csg 0,1,%0\n\t" |
| 687 | " jl 0b\n\t" |
| 688 | : "+m" (*p) |
| 689 | : "d" (n) |
| 690 | : "cc", "memory", "0", "1"); |
petarj | dd6bf60 | 2013-04-03 22:51:23 +0000 | [diff] [blame] | 691 | #elif defined(VGA_mips64) |
| 692 | unsigned long block[3] |
| 693 | = { (unsigned long)p, (unsigned long)n, 0x0ULL }; |
| 694 | do { |
| 695 | __asm__ __volatile__( |
| 696 | "move $t0, %0" "\n\t" |
| 697 | "ld $t1, 0($t0)" "\n\t" // p |
| 698 | "ld $t2, 8($t0)" "\n\t" // n |
| 699 | "lld $t3, 0($t1)" "\n\t" |
| 700 | "daddu $t3, $t3, $t2" "\n\t" |
| 701 | "scd $t3, 0($t1)" "\n\t" |
| 702 | "sd $t3, 16($t0)" "\n\t" |
| 703 | : /*out*/ |
| 704 | : /*in*/ "r"(&block[0]) |
| 705 | : /*trash*/ "memory", "t0", "t1", "t2", "t3" |
| 706 | ); |
| 707 | } while (block[2] != 1); |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 708 | #else |
| 709 | # error "Unsupported arch" |
| 710 | #endif |
| 711 | } |
| 712 | |
| 713 | int main ( int argc, char** argv ) |
| 714 | { |
| 715 | int i, status; |
| 716 | char* page; |
| 717 | char* p8; |
| 718 | short* p16; |
| 719 | int* p32; |
| 720 | long long int* p64; |
| 721 | pid_t child, p2; |
| 722 | |
| 723 | printf("parent, pre-fork\n"); |
| 724 | |
| 725 | page = mmap( 0, sysconf(_SC_PAGESIZE), |
| 726 | PROT_READ|PROT_WRITE, |
| 727 | MAP_ANONYMOUS|MAP_SHARED, -1, 0 ); |
| 728 | if (page == MAP_FAILED) { |
| 729 | perror("mmap failed"); |
| 730 | exit(1); |
| 731 | } |
| 732 | |
| 733 | p8 = (char*)(page+0); |
| 734 | p16 = (short*)(page+256); |
| 735 | p32 = (int*)(page+512); |
| 736 | p64 = (long long int*)(page+768); |
| 737 | |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 738 | assert( IS_8_ALIGNED(p8) ); |
| 739 | assert( IS_8_ALIGNED(p16) ); |
| 740 | assert( IS_8_ALIGNED(p32) ); |
| 741 | assert( IS_8_ALIGNED(p64) ); |
| 742 | |
| 743 | memset(page, 0, 1024); |
| 744 | |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 745 | *p8 = 0; |
| 746 | *p16 = 0; |
| 747 | *p32 = 0; |
| 748 | *p64 = 0; |
| 749 | |
| 750 | child = fork(); |
| 751 | if (child == -1) { |
| 752 | perror("fork() failed\n"); |
| 753 | return 1; |
| 754 | } |
| 755 | |
| 756 | if (child == 0) { |
| 757 | /* --- CHILD --- */ |
| 758 | printf("child\n"); |
| 759 | for (i = 0; i < NNN; i++) { |
| 760 | atomic_add_8bit(p8, 1); |
| 761 | atomic_add_16bit(p16, 1); |
| 762 | atomic_add_32bit(p32, 1); |
| 763 | atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ |
| 764 | } |
| 765 | return 1; |
| 766 | /* NOTREACHED */ |
| 767 | |
| 768 | } |
| 769 | |
| 770 | /* --- PARENT --- */ |
| 771 | |
| 772 | printf("parent\n"); |
| 773 | |
| 774 | for (i = 0; i < NNN; i++) { |
| 775 | atomic_add_8bit(p8, 1); |
| 776 | atomic_add_16bit(p16, 1); |
| 777 | atomic_add_32bit(p32, 1); |
| 778 | atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ |
| 779 | } |
| 780 | |
| 781 | p2 = waitpid(child, &status, 0); |
| 782 | assert(p2 == child); |
| 783 | |
| 784 | /* assert that child finished normally */ |
| 785 | assert(WIFEXITED(status)); |
| 786 | |
| 787 | printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n", |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 788 | (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 ); |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 789 | |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 790 | if (-74 == (int)(*(signed char*)p8) |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 791 | && 32694 == (int)(*p16) |
| 792 | && 6913974 == *p32 |
sewardj | 571dfb0 | 2009-07-04 14:33:53 +0000 | [diff] [blame] | 793 | && (0LL == *p64 || 682858642110LL == *p64)) { |
sewardj | ac258d8 | 2009-07-04 12:44:08 +0000 | [diff] [blame] | 794 | printf("PASS\n"); |
| 795 | } else { |
| 796 | printf("FAIL -- see source code for expected values\n"); |
| 797 | } |
| 798 | |
| 799 | printf("parent exits\n"); |
| 800 | |
| 801 | return 0; |
| 802 | } |