Rewrite ppc code generated for __sync_{bool|val}_compare_and_swap
so that lwarx and stwcx are always executed the same number of times.
This is important for performance, I'm told.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55163 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 2dec8d6..674161e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4056,21 +4056,21 @@
// loopMBB:
// l[wd]arx dest, ptr
- // cmp[wd] dest, oldval
- // bne- exitMBB
+ // cmp[wd] CR1, dest, oldval
// st[wd]cx. newval, ptr
+ // bne- CR1, exitMBB
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
.addReg(ptrA).addReg(ptrB);
- BuildMI(BB, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ BuildMI(BB, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR1)
.addReg(oldval).addReg(dest);
- BuildMI(BB, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(exitMBB);
BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
.addReg(newval).addReg(ptrA).addReg(ptrB);
BuildMI(BB, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR1).addMBB(exitMBB);
+ BuildMI(BB, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);