Rewrite ppc code generated for __sync_{bool|val}_compare_and_swap
so that lwarx and stwcx are always executed the same number of times.
This is important for performance, I'm told.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55163 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 2dec8d6..674161e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4056,21 +4056,21 @@
 
     //  loopMBB:
     //   l[wd]arx dest, ptr
-    //   cmp[wd] dest, oldval
-    //   bne- exitMBB
+    //   cmp[wd] CR1, dest, oldval
     //   st[wd]cx. newval, ptr
+    //   bne- CR1, exitMBB
     //   bne- loopMBB
     //   fallthrough --> exitMBB
     BB = loopMBB;
     BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
       .addReg(ptrA).addReg(ptrB);
-    BuildMI(BB, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+    BuildMI(BB, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR1)
       .addReg(oldval).addReg(dest);
-    BuildMI(BB, TII->get(PPC::BCC))
-      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(exitMBB);
     BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
       .addReg(newval).addReg(ptrA).addReg(ptrB);
     BuildMI(BB, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR1).addMBB(exitMBB);
+    BuildMI(BB, TII->get(PPC::BCC))
       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);    
     BB->addSuccessor(loopMBB);
     BB->addSuccessor(exitMBB);