[ARM] Emit clrex in the expanded cmpxchg fail block.
ARM counterpart to r248291:
In the comparison failure block of a cmpxchg expansion, the initial
ldrex/ldxr will not be followed by a matching strex/stxr.
On ARM/AArch64, this unnecessarily ties up the execution monitor,
which might have a negative performance impact on some uarchs.
Instead, release the monitor in the failure block.
The clrex instruction was designed for this: use it.
Also see ARMARM v8-A B2.10.2:
"Exclusive access instructions and Shareable memory locations".
Differential Revision: http://reviews.llvm.org/D13033
llvm-svn: 248294
diff --git a/llvm/test/CodeGen/ARM/atomic-cmp.ll b/llvm/test/CodeGen/ARM/atomic-cmp.ll
index 629b16d..7f41b7d 100644
--- a/llvm/test/CodeGen/ARM/atomic-cmp.ll
+++ b/llvm/test/CodeGen/ARM/atomic-cmp.ll
@@ -6,10 +6,12 @@
; ARM-LABEL: t:
; ARM: ldrexb
; ARM: strexb
+; ARM: clrex
; T2-LABEL: t:
-; T2: ldrexb
; T2: strexb
+; T2: ldrexb
+; T2: clrex
%tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
%tmp1 = extractvalue { i8, i1 } %tmp0, 0
ret i8 %tmp1
diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
index 84790be..10db26c 100644
--- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM
-; RUN: llc < %s -mtriple=thumb-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=thumb-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV7
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV7
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7
define zeroext i1 @test_cmpxchg_res_i8(i8* %addr, i8 %desired, i8 zeroext %new) {
entry:
@@ -30,24 +30,39 @@
; CHECK-THUMB: push {[[R2]]}
; CHECK-THUMB: pop {r0}
-; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8
-; CHECK-ARMV7: ldrexb [[R3:r[0-9]+]], [r0]
-; CHECK-ARMV7: mov [[R1:r[0-9]+]], #0
-; CHECK-ARMV7: cmp [[R3]], {{r[0-9]+}}
-; CHECK-ARMV7: bne
-; CHECK-ARMV7: strexb [[R3]], {{r[0-9]+}}, [{{r[0-9]+}}]
-; CHECK-ARMV7: mov [[R1]], #1
-; CHECK-ARMV7: cmp [[R3]], #0
-; CHECK-ARMV7: bne
-; CHECK-ARMV7: mov r0, [[R1]]
+; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
+; CHECK-ARMV7-NEXT: .fnstart
+; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
+; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
+; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1
+; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
+; CHECK-ARMV7-NEXT: bne [[TRY]]
+; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: [[FAIL]]:
+; CHECK-ARMV7-NEXT: clrex
+; CHECK-ARMV7-NEXT: mov [[RES]], #0
+; CHECK-ARMV7-NEXT: [[END]]:
+; CHECK-ARMV7-NEXT: mov r0, [[RES]]
+; CHECK-ARMV7-NEXT: bx lr
-; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8
-; CHECK-THUMBV7: ldrexb [[R3:r[0-9]+]], [r0]
-; CHECK-THUMBV7: cmp [[R3]], {{r[0-9]+}}
-; CHECK-THUMBV7: movne r0, #0
-; CHECK-THUMBV7: bxne lr
-; CHECK-THUMBV7: strexb [[R3]], {{r[0-9]+}}, [{{r[0-9]+}}]
-; CHECK-THUMBV7: cmp [[R3]], #0
-; CHECK-THUMBV7: itt eq
-; CHECK-THUMBV7: moveq r0, #1
-; CHECK-THUMBV7: bxeq lr
+; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
+; CHECK-THUMBV7-NEXT: .fnstart
+; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
+; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]]
+; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]:
+; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0
+; CHECK-THUMBV7-NEXT: itt eq
+; CHECK-THUMBV7-NEXT: moveq r0, #1
+; CHECK-THUMBV7-NEXT: bxeq lr
+; CHECK-THUMBV7-NEXT: [[TRYLD]]:
+; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]]
+; CHECK-THUMBV7-NEXT: clrex
+; CHECK-THUMBV7-NEXT: movs r0, #0
+; CHECK-THUMBV7-NEXT: bx lr
diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll
index db32bff..7913894 100644
--- a/llvm/test/CodeGen/ARM/atomic-op.ll
+++ b/llvm/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix CHECK-ARMV7
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2
; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-M0
; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL
@@ -272,16 +272,31 @@
%pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK: dmb ish
-; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
-; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
-; CHECK: cmp [[OLDVAL]], r1
-; CHECK: bxne lr
-; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
-; CHECK: cmp [[SUCCESS]], #0
-; CHECK: bne [[LOOP_BB]]
-; CHECK: dmb ish
-; CHECK: bx lr
+; CHECK-ARMV7: dmb ish
+; CHECK-ARMV7: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK-ARMV7: cmp [[OLDVAL]], r1
+; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK-ARMV7: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK-ARMV7: cmp [[SUCCESS]], #0
+; CHECK-ARMV7: bne [[LOOP_BB]]
+; CHECK-ARMV7: dmb ish
+; CHECK-ARMV7: bx lr
+; CHECK-ARMV7: [[FAIL_BB]]:
+; CHECK-ARMV7: clrex
+; CHECK-ARMV7: bx lr
+
+; CHECK-T2: dmb ish
+; CHECK-T2: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK-T2: cmp [[OLDVAL]], r1
+; CHECK-T2: clrexne
+; CHECK-T2: bxne lr
+; CHECK-T2: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK-T2: cmp [[SUCCESS]], #0
+; CHECK-T2: dmbeq ish
+; CHECK-T2: bxeq lr
+; CHECK-T2: b [[LOOP_BB]]
ret i32 %oldval
}
@@ -295,11 +310,14 @@
; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
; CHECK: cmp [[OLDVAL]], r1
-; CHECK: bne [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
; CHECK: cmp [[SUCCESS]], #0
; CHECK: bne [[LOOP_BB]]
-; CHECK: [[END_BB]]:
+; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK: [[FAIL_BB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[END_BB]]:
; CHECK: dmb ish
; CHECK: bx lr
diff --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
index 86287c1..efdb75b 100644
--- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1055,24 +1055,30 @@
%old = extractvalue { i8, i1 } %pair, 0
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-DAG: movt r[[ADDR]], :upper16:var8
+; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0
; CHECK: .LBB{{[0-9]+}}_1:
; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-ARM-NEXT: cmp r[[OLD]], r0
+; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
-; CHECK: strexb [[STATUS:r[0-9]+]], r1, {{.*}}[[ADDR]]
+; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD]]
+; CHECK-ARM: mov r0, r[[OLD]]
ret i8 %old
}
@@ -1082,24 +1088,30 @@
%old = extractvalue { i16, i1 } %pair, 0
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-DAG: movt r[[ADDR]], :upper16:var16
+; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0
; CHECK: .LBB{{[0-9]+}}_1:
; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-ARM-NEXT: cmp r[[OLD]], r0
+; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD]]
+; CHECK-ARM: mov r0, r[[OLD]]
ret i16 %old
}
@@ -1124,6 +1136,10 @@
; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NOT: dmb
; CHECK-NOT: mcr
@@ -1158,6 +1174,10 @@
; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NOT: dmb
; CHECK-NOT: mcr
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
index 126e330..1eac9c4 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -5,16 +5,24 @@
%pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK: dmb ish
-; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
-; CHECK: cmp [[LOADED]], r1
-; CHECK: strexeq [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK: cmpeq [[SUCCESS]], #0
-; CHECK: bne [[DONE:LBB[0-9]+_[0-9]+]]
-; CHECK: dmb ish
-; CHECK: [[DONE]]:
-; CHECK: str r3, [r0]
-; CHECK: bx lr
+; CHECK-NEXT: BB#0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0]
+; CHECK-NEXT: cmp [[LOADED]], r1
+; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#1:
+; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-NEXT: cmp [[SUCCESS]], #0
+; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: str r3, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: [[LDFAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: str r3, [r0]
+; CHECK-NEXT: bx lr
store i32 %oldval, i32* %addr
ret void
@@ -27,17 +35,23 @@
%pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%success = extractvalue { i32, i1 } %pair, 1
-; CHECK: dmb ish
-; CHECK: mov r0, #0
-; CHECK: ldrex [[LOADED:r[0-9]+]], [r1]
-; CHECK: cmp [[LOADED]], r2
-; CHECK: strexeq [[STATUS:r[0-9]+]], r3, [r1]
-; CHECK: cmpeq [[STATUS]], #0
-; CHECK: bne [[DONE:LBB[0-9]+_[0-9]+]]
-; CHECK: dmb ish
-; CHECK: mov r0, #1
-; CHECK: [[DONE]]:
-; CHECK: bx lr
+; CHECK-NEXT: BB#0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1]
+; CHECK-NEXT: cmp [[LOADED]], r2
+; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#1:
+; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: cmp [[SUCCESS]], #0
+; CHECK-NEXT: bxne lr
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: mov r0, #1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: [[LDFAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: bx lr
ret i1 %success
}