Atomics: make use of the "cmpxchg weak" instruction.
This also simplifies the IR we create slightly: instead of working out
where success & failure should go manually, it turns out we can just
always jump to a success/failure block created for the purpose. Later
phases will sort out the mess without much difficulty.
llvm-svn: 210917
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index f18cf22..462c185 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -171,9 +171,10 @@
define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-LABEL: test7:
-; CHECK: dmb {{ish$}}
+; CHECK-DAG: mov [[VAL1LO:r[0-9]+]], r1
+; CHECK-DAG: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-LE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1
+; CHECK-LE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], [[VAL1LO]]
; CHECK-LE-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
; CHECK-BE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG2]], r2
; CHECK-BE-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG1]], r1
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
new file mode 100644
index 0000000..126e330
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+
+define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_cmpxchg_weak:
+
+ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+ %oldval = extractvalue { i32, i1 } %pair, 0
+; CHECK: dmb ish
+; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: strexeq [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK: cmpeq [[SUCCESS]], #0
+; CHECK: bne [[DONE:LBB[0-9]+_[0-9]+]]
+; CHECK: dmb ish
+; CHECK: [[DONE]]:
+; CHECK: str r3, [r0]
+; CHECK: bx lr
+
+ store i32 %oldval, i32* %addr
+ ret void
+}
+
+
+define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_cmpxchg_weak_to_bool:
+
+ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+ %success = extractvalue { i32, i1 } %pair, 1
+
+; CHECK: dmb ish
+; CHECK: mov r0, #0
+; CHECK: ldrex [[LOADED:r[0-9]+]], [r1]
+; CHECK: cmp [[LOADED]], r2
+; CHECK: strexeq [[STATUS:r[0-9]+]], r3, [r1]
+; CHECK: cmpeq [[STATUS]], #0
+; CHECK: bne [[DONE:LBB[0-9]+_[0-9]+]]
+; CHECK: dmb ish
+; CHECK: mov r0, #1
+; CHECK: [[DONE]]:
+; CHECK: bx lr
+
+ ret i1 %success
+}
diff --git a/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
index 3a549cd..f2f5b92 100644
--- a/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
+++ b/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
@@ -229,20 +229,24 @@
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[BARRIER:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-; CHECK: [[BARRIER]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[TRY_STORE]] ], [ false, %[[LOOP]] ]
+; CHECK: [[SUCCESS_BB]]:
; CHECK: fence seq_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[FAILURE_BB]]:
+; CHECK: fence seq_cst
+; CHECK: br label %[[DONE]]
+
; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i8 [[OLDVAL]]
%pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
@@ -259,20 +263,24 @@
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[BARRIER:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
; CHECK: fence seq_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[BARRIER]] ], [ false, %[[LOOP]] ]
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i16 [[OLDVAL]]
%pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
@@ -288,19 +296,23 @@
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[BARRIER:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-; CHECK: [[BARRIER]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[TRY_STORE]] ], [ false, %[[LOOP]] ]
+; CHECK: [[SUCCESS_BB]]:
; CHECK: fence acquire
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[FAILURE_BB]]:
+; CHECK: fence acquire
+; CHECK: br label %[[DONE]]
+
; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i32 [[OLDVAL]]
%pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
@@ -323,7 +335,7 @@
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -332,14 +344,18 @@
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[BARRIER:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[BARRIER]] ], [ false, %[[LOOP]] ]
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i64 [[OLDVAL]]
%pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
diff --git a/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll b/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
index d6781e1..8092c10 100644
--- a/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
+++ b/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
@@ -91,20 +91,24 @@
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[BARRIER:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-; CHECK: [[BARRIER]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[TRY_STORE]] ], [ false, %[[LOOP]] ]
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i8 [[OLDVAL]]
%pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
@@ -121,20 +125,24 @@
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[BARRIER:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[BARRIER]] ], [ false, %[[LOOP]] ]
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i16 [[OLDVAL]]
%pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
@@ -150,19 +158,23 @@
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[BARRIER:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-; CHECK: [[BARRIER]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[TRY_STORE]] ], [ false, %[[LOOP]] ]
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i32 [[OLDVAL]]
%pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
@@ -185,7 +197,7 @@
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -194,13 +206,18 @@
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[BARRIER:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-; CHECK: [[BARRIER]]:
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i64 [[OLDVAL]]
%pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
diff --git a/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll b/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
new file mode 100644
index 0000000..07a4a7f
--- /dev/null
+++ b/llvm/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
@@ -0,0 +1,97 @@
+; RUN: opt -atomic-ll-sc -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
+
+define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_seq_cst
+; CHECK: fence release
+; CHECK: br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: fence seq_cst
+; CHECK: br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK: fence seq_cst
+; CHECK: br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i32 [[LOADED]]
+
+ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+ %oldval = extractvalue { i32, i1 } %pair, 0
+ ret i32 %oldval
+}
+
+define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_weak_fail
+; CHECK: fence release
+; CHECK: br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: fence seq_cst
+; CHECK: br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i1 [[SUCCESS]]
+
+ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+ %oldval = extractvalue { i32, i1 } %pair, 1
+ ret i1 %oldval
+}
+
+define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_monotonic
+; CHECK-NOT: fence
+; CHECK: br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i32 [[LOADED]]
+
+ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic
+ %oldval = extractvalue { i32, i1 } %pair, 0
+ ret i32 %oldval
+}