[PowerPC] Remove redundant CRSET/CRUNSET in custom lowering of known CR bit spills

We lower known CR bit spills (CRSET/CRUNSET) to load and spill the known value
but forgot to remove the redundant spills.

e.g., This sequence was used to spill a CRUNSET:
    crclr   4*cr5+lt
    mfocrf  r3,4
    rlwinm  r3,r3,20,0,0
    stw     r3,132(r1)

Custom lowering of known CR bit spills lower it to:
    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
    li  r3,0
    stw r3,132(r1)

crxor is redundant if there is no use of 4*cr5+lt so we should remove it

Differential revision: https://reviews.llvm.org/D67722
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 8664f0e..256788f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3184,6 +3184,11 @@
 // the function label.
 def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
 
+// Pseudo-instruction marked for deletion. When deleting the instruction would
+// cause iterator invalidation in MIR transformation passes, this pseudo can be
+// used instead. It will be removed unconditionally at pre-emit time (prior to
+// branch selection).
+def UNENCODED_NOP: PPCEmitTimePseudo<(outs), (ins), "#UNENCODED_NOP", []>;
 
 // Standard shifts.  These are represented separately from the real shifts above
 // so that we can distinguish between shifts that allow 5-bit and 6-bit shift
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 15c09a5..a4b4bf2 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -163,8 +163,19 @@
     }
 
     bool runOnMachineFunction(MachineFunction &MF) override {
-      if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole)
+      if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
+        // Remove UNENCODED_NOP even when this pass is disabled.
+        // This needs to be done unconditionally so we don't emit zeros
+        // in the instruction stream.
+        SmallVector<MachineInstr *, 4> InstrsToErase;
+        for (MachineBasicBlock &MBB : MF)
+          for (MachineInstr &MI : MBB)
+            if (MI.getOpcode() == PPC::UNENCODED_NOP)
+              InstrsToErase.push_back(&MI);
+        for (MachineInstr *MI : InstrsToErase)
+          MI->eraseFromParent();
         return false;
+      }
       bool Changed = false;
       const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -173,6 +184,10 @@
         Changed |= removeRedundantLIs(MBB, TRI);
         for (MachineInstr &MI : MBB) {
           unsigned Opc = MI.getOpcode();
+          if (Opc == PPC::UNENCODED_NOP) {
+            InstrsToErase.push_back(&MI);
+            continue;
+          }
           // Detect self copies - these can result from running AADB.
           if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
             const MCInstrDesc &MCID = TII->get(Opc);
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9ec26a1..90193c2 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -747,12 +747,18 @@
   Register SrcReg = MI.getOperand(0).getReg();
 
   // Search up the BB to find the definition of the CR bit.
-  MachineBasicBlock::reverse_iterator Ins;
+  MachineBasicBlock::reverse_iterator Ins = MI;
+  MachineBasicBlock::reverse_iterator Rend = MBB.rend();
+  ++Ins;
   unsigned CRBitSpillDistance = 0;
-  for (Ins = MI; Ins != MBB.rend(); Ins++) {
+  bool SeenUse = false;
+  for (; Ins != Rend; ++Ins) {
     // Definition found.
     if (Ins->modifiesRegister(SrcReg, TRI))
       break;
+    // Use found.
+    if (Ins->readsRegister(SrcReg, TRI))
+      SeenUse = true;
     // Unable to find CR bit definition within maximum search distance.
     if (CRBitSpillDistance == MaxCRBitSpillDist) {
       Ins = MI;
@@ -767,15 +773,18 @@
   if (Ins == MBB.rend())
     Ins = MI;
 
+  bool SpillsKnownBit = false;
   // There is no need to extract the CR bit if its value is already known.
   switch (Ins->getOpcode()) {
   case PPC::CRUNSET:
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
       .addImm(0);
+    SpillsKnownBit = true;
     break;
   case PPC::CRSET:
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
       .addImm(-32768);
+    SpillsKnownBit = true;
     break;
   default:
     // We need to move the CR field that contains the CR bit we are spilling.
@@ -803,8 +812,13 @@
                     .addReg(Reg, RegState::Kill),
                     FrameIndex);
 
+  bool KillsCRBit = MI.killsRegister(SrcReg, TRI);
   // Discard the pseudo instruction.
   MBB.erase(II);
+  if (SpillsKnownBit && KillsCRBit && !SeenUse) {
+    Ins->setDesc(TII.get(PPC::UNENCODED_NOP));
+    Ins->RemoveOperand(0);
+  }
 }
 
 void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,