AMDGPU: Make exec mask optimzations more resistant to block splits

Also improve the check for SALU instructions to also ignore
implicit_def and other fake instructions.

llvm-svn: 357170
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index f5724a7..df06ba5 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -33,10 +33,22 @@
 namespace {
 
 class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
+private:
+  const SIRegisterInfo *TRI;
+  const SIInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+
+public:
+  MachineBasicBlock::iterator skipIgnoreExecInsts(
+    MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const;
+
+    MachineBasicBlock::iterator skipIgnoreExecInstsTrivialSucc(
+      MachineBasicBlock *&MBB,
+      MachineBasicBlock::iterator It) const;
+
 public:
   static char ID;
 
-public:
   SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
     initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
   }
@@ -102,6 +114,45 @@
   return SaveExecInst;
 }
 
+/// Skip over instructions that don't care about the exec mask.
+MachineBasicBlock::iterator SIOptimizeExecMaskingPreRA::skipIgnoreExecInsts(
+  MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const {
+  for ( ; I != E; ++I) {
+    if (TII->mayReadEXEC(*MRI, *I))
+      break;
+  }
+
+  return I;
+}
+
+// Skip to the next instruction, ignoring debug instructions, and trivial block
+// boundaries (blocks that have one (typically fallthrough) successor, and the
+// successor has one predecessor.
+MachineBasicBlock::iterator
+SIOptimizeExecMaskingPreRA::skipIgnoreExecInstsTrivialSucc(
+  MachineBasicBlock *&MBB,
+  MachineBasicBlock::iterator It) const {
+
+  do {
+    It = skipIgnoreExecInsts(It, MBB->end());
+    if (It != MBB->end() || MBB->succ_size() != 1)
+      break;
+
+    // If there is one trivial successor, advance to the next block.
+    MachineBasicBlock *Succ = *MBB->succ_begin();
+
+    // TODO: Is this really necessary?
+    if (!MBB->isLayoutSuccessor(Succ))
+      break;
+
+    It = Succ->begin();
+    MBB = Succ;
+  } while (true);
+
+  return It;
+}
+
+
 // Optimize sequence
 //    %sel = V_CNDMASK_B32_e64 0, 1, %cc
 //    %cmp = V_CMP_NE_U32 1, %1
@@ -227,8 +278,10 @@
     return false;
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  const SIRegisterInfo *TRI = ST.getRegisterInfo();
-  const SIInstrInfo *TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
+  TII = ST.getInstrInfo();
+  MRI = &MF.getRegInfo();
+
   MachineRegisterInfo &MRI = MF.getRegInfo();
   LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
   DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
@@ -313,25 +366,9 @@
     if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
       continue;
 
-    const MachineBasicBlock* Succ = *MBB.succ_begin();
-    if (!MBB.isLayoutSuccessor(Succ))
-      continue;
-
-    auto I = std::next(Lead);
-
-    for ( ; I != E; ++I) {
-      if (I->isDebugInstr())
-        continue;
-
-      if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI))
-        break;
-    }
-
-    if (I != E)
-      continue;
-
-    auto NextLead = skipDebugInstructionsForward(Succ->begin(), Succ->end());
-    if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) ||
+    MachineBasicBlock *TmpMBB = &MBB;
+    auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead));
+    if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI) ||
         !getOrExecSource(*NextLead, *TII, MRI))
       continue;