ARM: sink atomic release barrier as far as possible into cmpxchg.
DMB instructions can be expensive, so it's best to avoid them if possible. In
atomicrmw operations there will always be an attempted store so a release
barrier is always needed, but in the cmpxchg case we can delay the DMB until we
know we'll definitely try to perform a store (and so need release semantics).
In the strong cmpxchg case this isn't quite free: we must duplicate the LDREX
instructions to skip the barrier on subsequent iterations. The basic outline
becomes:
ldrex rOld, [rAddr]
cmp rOld, rDesired
bne Ldone
dmb
Lloop:
strex rRes, rNew, [rAddr]
cbz rRes Ldone
ldrex rOld, [rAddr]
cmp rOld, rDesired
beq Lloop
Ldone:
So we'll skip this version for strong operations in "minsize" functions.
llvm-svn: 261568
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index 573cd45..a188a95 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -172,31 +172,31 @@
define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-LABEL: test7:
; CHECK-DAG: mov [[VAL1LO:r[0-9]+]], r1
-; CHECK-DAG: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-LE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], [[VAL1LO]]
-; CHECK-LE-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
-; CHECK-BE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG2]], r2
-; CHECK-BE-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG1]], r1
+; CHECK-LE-DAG: eor [[MISMATCH_LO:.*]], [[REG1]], [[VAL1LO]]
+; CHECK-LE-DAG: eor [[MISMATCH_HI:.*]], [[REG2]], r2
+; CHECK-BE-DAG: eor [[MISMATCH_LO:.*]], [[REG2]], r2
+; CHECK-BE-DAG: eor [[MISMATCH_HI:.*]], [[REG1]], r1
; CHECK: orrs {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK: bne
+; CHECK-DAG: dmb {{ish$}}
; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
; CHECK: cmp
-; CHECK: bne
+; CHECK: beq
; CHECK: dmb {{ish$}}
; CHECK-THUMB-LABEL: test7:
-; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2
; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3
-; CHECK-THUMB-LE: orrs [[MISMATCH_HI]], [[MISMATCH_LO]]
+; CHECK-THUMB-LE: orrs.w {{.*}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK-THUMB: bne
+; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp
-; CHECK-THUMB: bne
+; CHECK-THUMB: beq
; CHECK-THUMB: dmb {{ish$}}
%pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll
index 7913894..f7ef492 100644
--- a/llvm/test/CodeGen/ARM/atomic-op.ll
+++ b/llvm/test/CodeGen/ARM/atomic-op.ll
@@ -272,31 +272,37 @@
%pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK-ARMV7: dmb ish
-; CHECK-ARMV7: [[LOOP_BB:\.?LBB[0-9]+_1]]:
; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
; CHECK-ARMV7: cmp [[OLDVAL]], r1
; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK-ARMV7: dmb ish
+; CHECK-ARMV7: [[LOOP_BB:\.?LBB.*]]:
; CHECK-ARMV7: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
; CHECK-ARMV7: cmp [[SUCCESS]], #0
-; CHECK-ARMV7: bne [[LOOP_BB]]
-; CHECK-ARMV7: dmb ish
-; CHECK-ARMV7: bx lr
+; CHECK-ARMV7: beq [[SUCCESS_BB:\.?LBB.*]]
+; CHECK-ARMV7: ldrex [[OLDVAL]], [r[[ADDR]]]
+; CHECK-ARMV7: cmp [[OLDVAL]], r1
+; CHECK-ARMV7: beq [[LOOP_BB]]
; CHECK-ARMV7: [[FAIL_BB]]:
; CHECK-ARMV7: clrex
; CHECK-ARMV7: bx lr
+; CHECK-ARMV7: [[SUCCESS_BB]]:
+; CHECK-ARMV7: dmb ish
+; CHECK-ARMV7: bx lr
-; CHECK-T2: dmb ish
-; CHECK-T2: [[LOOP_BB:\.?LBB[0-9]+_1]]:
; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
; CHECK-T2: cmp [[OLDVAL]], r1
-; CHECK-T2: clrexne
-; CHECK-T2: bxne lr
+; CHECK-T2: bne [[FAIL_BB:\.?LBB.*]]
+; CHECK-T2: dmb ish
+; CHECK-T2: [[LOOP_BB:\.?LBB.*]]:
; CHECK-T2: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
; CHECK-T2: cmp [[SUCCESS]], #0
; CHECK-T2: dmbeq ish
; CHECK-T2: bxeq lr
-; CHECK-T2: b [[LOOP_BB]]
+; CHECK-T2: ldrex [[OLDVAL]], [r[[ADDR]]]
+; CHECK-T2: cmp [[OLDVAL]], r1
+; CHECK-T2: beq [[LOOP_BB]]
+; CHECK-T2: clrex
ret i32 %oldval
}
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
index 81e05ac..283202f 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -3,28 +3,33 @@
define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_return:
-; CHECK: dmb ishst
-
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], r1
; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
-; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: dmb ishst
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
-; CHECK: movs r0, #1
-; CHECK: bx lr
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+
+; CHECK: ldrex [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: beq [[LOOP]]
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: clrex
; CHECK: dmb ish
; CHECK: movs r0, #0
; CHECK: bx lr
+; CHECK: [[SUCCESS]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: dmb ish
+; CHECK: movs r0, #1
+; CHECK: bx lr
+
%pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
%success = extractvalue { i32, i1 } %pair, 1
%conv = zext i1 %success to i32
@@ -34,26 +39,33 @@
define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
; CHECK-LABEL: test_return_bool:
-; CHECK: dmb ishst
; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], [[OLDBYTE]]
; CHECK: bne [[FAIL:LBB[0-9]+_[0-9]+]]
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+
+; CHECK: ldrexb [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], [[OLDBYTE]]
+; CHECK: beq [[LOOP]]
+
; FIXME: this eor is redundant. Need to teach DAG combine that.
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: [[FAIL]]:
+; CHECK: clrex
+; CHECK: movs [[TMP:r[0-9]+]], #0
; CHECK: eor r0, [[TMP]], #1
; CHECK: bx lr
-; CHECK: [[FAIL]]:
-; CHECK: movs [[TMP:r[0-9]+]], #0
+; CHECK: [[SUCCESS]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: movs [[TMP:r[0-9]+]], #1
; CHECK: eor r0, [[TMP]], #1
; CHECK: bx lr
@@ -67,26 +79,31 @@
define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_conditional:
-; CHECK: dmb ishst
-
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], r1
; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
-; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: dmb ishst
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
-; CHECK: b.w _bar
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+
+; CHECK: ldrex [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: beq [[LOOP]]
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: clrex
; CHECK: dmb ish
; CHECK: b.w _baz
+; CHECK: [[SUCCESS]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: dmb ish
+; CHECK: b.w _bar
+
%pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
%success = extractvalue { i32, i1 } %pair, 1
br i1 %success, label %true, label %false
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
index 1eac9c4..4038528 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -6,11 +6,11 @@
%pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%oldval = extractvalue { i32, i1 } %pair, 0
; CHECK-NEXT: BB#0:
-; CHECK-NEXT: dmb ish
; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0]
; CHECK-NEXT: cmp [[LOADED]], r1
; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: BB#1:
+; CHECK-NEXT: dmb ish
; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0]
; CHECK-NEXT: cmp [[SUCCESS]], #0
; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
@@ -36,13 +36,13 @@
%success = extractvalue { i32, i1 } %pair, 1
; CHECK-NEXT: BB#0:
-; CHECK-NEXT: dmb ish
; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1]
; CHECK-NEXT: cmp [[LOADED]], r2
; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: BB#1:
-; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
+; CHECK-NEXT: dmb ish
; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
; CHECK-NEXT: cmp [[SUCCESS]], #0
; CHECK-NEXT: bxne lr
; CHECK-NEXT: dmb ish