[ARM] Fix for DLS/LE CodeGen

The expensive buildbots highlighted the mir tests were broken, which
I've now updated and added --verify-machineinstrs to them. This also
uncovered a couple of bugs in the backend pass, so these have also
been fixed.

llvm-svn: 364323
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index b7f3e5b..6a3709d 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -74,8 +74,8 @@
                 false, false)
 
 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
-  //if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
-    //return false;
+  if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
+    return false;
 
   LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
 
@@ -133,16 +133,15 @@
         Dec = &MI;
       else if (MI.getOpcode() == ARM::t2LoopEnd)
         End = &MI;
+      else if (MI.getDesc().isCall())
+        // TODO: Though the call will require LE to execute again, does this
+        // mean we should revert? Always executing LE hopefully should be
+        // faster than performing a sub,cmp,br or even subs,br.
+        Revert = true;
 
       if (!Dec)
         continue;
 
-      // TODO: Though the call will require LE to execute again, does this
-      // mean we should revert? Always executing LE hopefully should be faster
-      // than performing a sub,cmp,br or even subs,br.
-      if (MI.getDesc().isCall())
-        Revert = true;
-
       // If we find that we load/store LR between LoopDec and LoopEnd, expect
       // that the decremented value has been spilled to the stack. Because
       // this value isn't actually going to be produced until the latch, by LE,
@@ -272,11 +271,13 @@
     MIB.addReg(ARM::LR);
     MIB.addImm(0);
     MIB.addImm(ARMCC::AL);
+    MIB.addReg(ARM::CPSR);
 
     // Create bne
     MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2Bcc));
     MIB.add(End->getOperand(1));  // branch target
     MIB.addImm(ARMCC::NE);        // condition code
+    MIB.addReg(ARM::CPSR);
     End->eraseFromParent();
     Dec->eraseFromParent();
   };