Robin Morisset | 810739d | 2014-09-25 17:27:43 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64 |
| 2 | ; RUN: llc < %s -march=x86 -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X32 |
| 3 | |
| 4 | ; On x86, an atomic rmw operation that does not modify the value in memory |
| 5 | ; (such as atomic add 0) can be replaced by an mfence followed by a mov. |
| 6 | ; This is explained (with the motivation for such an optimization) in |
| 7 | ; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
| 8 | |
| 9 | define i8 @add8(i8* %p) { |
| 10 | ; CHECK-LABEL: add8 |
| 11 | ; CHECK: mfence |
| 12 | ; CHECK: movb |
| 13 | %1 = atomicrmw add i8* %p, i8 0 monotonic |
| 14 | ret i8 %1 |
| 15 | } |
| 16 | |
| 17 | define i16 @or16(i16* %p) { |
| 18 | ; CHECK-LABEL: or16 |
| 19 | ; CHECK: mfence |
| 20 | ; CHECK: movw |
| 21 | %1 = atomicrmw or i16* %p, i16 0 acquire |
| 22 | ret i16 %1 |
| 23 | } |
| 24 | |
| 25 | define i32 @xor32(i32* %p) { |
| 26 | ; CHECK-LABEL: xor32 |
| 27 | ; CHECK: mfence |
| 28 | ; CHECK: movl |
| 29 | %1 = atomicrmw xor i32* %p, i32 0 release |
| 30 | ret i32 %1 |
| 31 | } |
| 32 | |
| 33 | define i64 @sub64(i64* %p) { |
| 34 | ; CHECK-LABEL: sub64 |
| 35 | ; X64: mfence |
| 36 | ; X64: movq |
| 37 | ; X32-NOT: mfence |
| 38 | %1 = atomicrmw sub i64* %p, i64 0 seq_cst |
| 39 | ret i64 %1 |
| 40 | } |
| 41 | |
| 42 | define i128 @or128(i128* %p) { |
| 43 | ; CHECK-LABEL: or128 |
| 44 | ; CHECK-NOT: mfence |
| 45 | %1 = atomicrmw or i128* %p, i128 0 monotonic |
| 46 | ret i128 %1 |
| 47 | } |
| 48 | |
| 49 | ; For 'and', the idempotent value is (-1) |
| 50 | define i32 @and32 (i32* %p) { |
| 51 | ; CHECK-LABEL: and32 |
| 52 | ; CHECK: mfence |
| 53 | ; CHECK: movl |
| 54 | %1 = atomicrmw and i32* %p, i32 -1 acq_rel |
| 55 | ret i32 %1 |
| 56 | } |