[AMDGPU] Fix DPP combiner check for exec modification

Summary:
r363675 changed the exec modification helper function, now called
execMayBeModifiedBeforeUse, so that if no UseMI is specified it checks
all instructions in the basic block, even beyond the last use. That
meant that the DPP combiner no longer worked in any basic block that
ended with a control flow instruction, and in particular it didn't work
on code sequences generated by the atomic optimizer.

Fix it by reinstating the old behaviour but in a new helper function
execMayBeModifiedBeforeAnyUse, and limiting the number of instructions
scanned.

Reviewers: arsenm, vpykhtin

Subscribers: kzhuravl, nemanjai, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kbarton, MaskRay, jfb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64393

llvm-svn: 365910
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d855f3f..3474185 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6269,42 +6269,29 @@
 }
 
 bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
-                                      unsigned VReg,
+                                      Register VReg,
                                       const MachineInstr &DefMI,
-                                      const MachineInstr *UseMI) {
+                                      const MachineInstr &UseMI) {
   assert(MRI.isSSA() && "Must be run on SSA");
 
   auto *TRI = MRI.getTargetRegisterInfo();
   auto *DefBB = DefMI.getParent();
 
-  if (UseMI) {
-    // Don't bother searching between blocks, although it is possible this block
-    // doesn't modify exec.
-    if (UseMI->getParent() != DefBB)
-      return true;
-  } else {
-    int NumUse = 0;
-    const int MaxUseScan = 10;
-
-    for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) {
-      if (UseInst.getParent() != DefBB)
-        return true;
-
-      if (NumUse++ > MaxUseScan)
-        return true;
-    }
-  }
+  // Don't bother searching between blocks, although it is possible this block
+  // doesn't modify exec.
+  if (UseMI.getParent() != DefBB)
+    return true;
 
   const int MaxInstScan = 20;
-  int NumScan = 0;
+  int NumInst = 0;
 
-  // Stop scan at the use if known.
-  auto E = UseMI ? UseMI->getIterator() : DefBB->end();
+  // Stop scan at the use.
+  auto E = UseMI.getIterator();
   for (auto I = std::next(DefMI.getIterator()); I != E; ++I) {
     if (I->isDebugInstr())
       continue;
 
-    if (NumScan++ > MaxInstScan)
+    if (++NumInst > MaxInstScan)
       return true;
 
     if (I->modifiesRegister(AMDGPU::EXEC, TRI))
@@ -6313,3 +6300,44 @@
 
   return false;
 }
+
+bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
+                                         Register VReg,
+                                         const MachineInstr &DefMI) {
+  assert(MRI.isSSA() && "Must be run on SSA");
+
+  auto *TRI = MRI.getTargetRegisterInfo();
+  auto *DefBB = DefMI.getParent();
+
+  const int MaxUseInstScan = 10;
+  int NumUseInst = 0;
+
+  for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) {
+    // Don't bother searching between blocks, although it is possible this block
+    // doesn't modify exec.
+    if (UseInst.getParent() != DefBB)
+      return true;
+
+    if (++NumUseInst > MaxUseInstScan)
+      return true;
+  }
+
+  const int MaxInstScan = 20;
+  int NumInst = 0;
+
+  // Stop scan when we have seen all the uses.
+  for (auto I = std::next(DefMI.getIterator()); ; ++I) {
+    if (I->isDebugInstr())
+      continue;
+
+    if (++NumInst > MaxInstScan)
+      return true;
+
+    if (I->readsRegister(VReg))
+      if (--NumUseInst == 0)
+        return false;
+
+    if (I->modifiesRegister(AMDGPU::EXEC, TRI))
+      return true;
+  }
+}