[AMDGPU] Partial revert for the ba447bae7448435c9986eece0811da1423972fdd "Divergence driven ISel. Assign register class for cross block values according to the divergence." that discovered the design flaw leading to several issues that required to be solved before. This change reverts AMDGPU specific changes and keeps common part unaffected. llvm-svn: 362749

commit: 37bd9bd13750a368c0468f0768f29edc37dc540a [log] [tgz]
author: Alexander Timofeev <Alexander.Timofeev@amd.com> Thu Jun 06 21:13:02 2019 +0000
committer: Alexander Timofeev <Alexander.Timofeev@amd.com> Thu Jun 06 21:13:02 2019 +0000
tree: a1389c1689b8a5bd10f976aa66e541bb47ea255f
parent: 169fc2b0209d5574fca0927a707706ea2d5f5a09 [diff] [blame]
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index fb151b4..cc16d93 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

@@ -302,6 +302,52 @@
   return true;
 }
 
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+                               const MachineRegisterInfo &MRI,
+                               const SIRegisterInfo *TRI,
+                               const SIInstrInfo *TII) {
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+      return true;
+  }
+  return false;
+}
+
+static bool phiHasBreakDef(const MachineInstr &PHI,
+                           const MachineRegisterInfo &MRI,
+                           SmallSet<unsigned, 8> &Visited) {
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (Visited.count(Reg))
+      continue;
+
+    Visited.insert(Reg);
+
+    MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+    switch (DefInstr->getOpcode()) {
+    default:
+      break;
+    case AMDGPU::SI_IF_BREAK:
+      return true;
+    case AMDGPU::PHI:
+      if (phiHasBreakDef(*DefInstr, MRI, Visited))
+        return true;
+    }
+  }
+  return false;
+}
+
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+                                          const TargetRegisterInfo &TRI) {
+  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+       E = MBB.end(); I != E; ++I) {
+    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+      return true;
+  }
+  return false;
+}
+
 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
                                     const MachineInstr *MoveImm,
                                     const SIInstrInfo *TII,
@@ -363,6 +409,12 @@
   return false;
 }
 
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+                                        const TargetRegisterInfo *TRI) {
+  return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+           return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
 // Checks if there is potential path From instruction To instruction.
 // If CutOff is specified and it sits in between of that path we ignore
 // a higher portion of the path and report it is not reachable.
@@ -569,77 +621,62 @@
         break;
       }
       case AMDGPU::PHI: {
-        unsigned hasVGPRUses = 0;
-        SetVector<const MachineInstr *> worklist;
-        worklist.insert(&MI);
-        while (!worklist.empty()) {
-          const MachineInstr *Instr = worklist.pop_back_val();
-          unsigned Reg = Instr->getOperand(0).getReg();
-          for (const auto &Use : MRI.use_operands(Reg)) {
-            const MachineInstr *UseMI = Use.getParent();
-            if (UseMI->isCopy() || UseMI->isRegSequence()) {
-              if (UseMI->isCopy() &&
-                  TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
-                  !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
-                hasVGPRUses++;
-              }
-              worklist.insert(UseMI);
-              continue;
-            }
+        unsigned Reg = MI.getOperand(0).getReg();
+        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+          break;
 
-            if (UseMI->isPHI()) {
-              const TargetRegisterClass *UseRC = MRI.getRegClass(Use.getReg());
-              if (!TRI->isSGPRReg(MRI, Use.getReg()) &&
-                  UseRC != &AMDGPU::VReg_1RegClass)
-                hasVGPRUses++;
-              continue;
-            }
+        // We don't need to fix the PHI if the common dominator of the
+        // two incoming blocks terminates with a uniform branch.
+        bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+        if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
+          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
 
-            unsigned OpNo = UseMI->getOperandNo(&Use);
-            const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
-            if (!Desc.isPseudo() && Desc.OpInfo &&
-                OpNo < Desc.getNumOperands() &&
-                Desc.OpInfo[OpNo].RegClass != -1) {
-              const TargetRegisterClass *OpRC =
-                  TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
-              if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
-                  OpRC != &AMDGPU::VS_64RegClass) {
-                hasVGPRUses++;
-              }
-            }
-          }
-        }
-        bool hasVGPRInput = false;
-        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
-          unsigned InputReg = MI.getOperand(i).getReg();
-          MachineInstr *Def = MRI.getVRegDef(InputReg);
-          if (TRI->isVGPR(MRI, InputReg)) {
-            if (Def->isCopy()) {
-              unsigned SrcReg = Def->getOperand(1).getReg();
-              const TargetRegisterClass *RC =
-                  TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
-                                                 : TRI->getPhysRegClass(SrcReg);
-              if (TRI->isSGPRClass(RC))
-                continue;
-            }
-            hasVGPRInput = true;
-            break;
-          } else if (Def->isCopy() &&
-                     TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
-            hasVGPRInput = true;
+          if (!predsHasDivergentTerminator(MBB0, TRI) &&
+              !predsHasDivergentTerminator(MBB1, TRI)) {
+            LLVM_DEBUG(dbgs()
+                       << "Not fixing PHI for uniform branch: " << MI << '\n');
             break;
           }
         }
-        unsigned PHIRes = MI.getOperand(0).getReg();
-        const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
 
-        if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
-            (hasVGPRInput || hasVGPRUses > 1)) {
+        // If a PHI node defines an SGPR and any of its operands are VGPRs,
+        // then we need to move it to the VALU.
+        //
+        // Also, if a PHI node defines an SGPR and has all SGPR operands
+        // we must move it to the VALU, because the SGPR operands will
+        // all end up being assigned the same register, which means
+        // there is a potential for a conflict if different threads take
+        // different control flow paths.
+        //
+        // For Example:
+        //
+        // sgpr0 = def;
+        // ...
+        // sgpr1 = def;
+        // ...
+        // sgpr2 = PHI sgpr0, sgpr1
+        // use sgpr2;
+        //
+        // Will Become:
+        //
+        // sgpr2 = def;
+        // ...
+        // sgpr2 = def;
+        // ...
+        // use sgpr2
+        //
+        // The one exception to this rule is when one of the operands
+        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+        // instruction.  In this case, there we know the program will
+        // never enter the second block (the loop) without entering
+        // the first block (where the condition is computed), so there
+        // is no chance for values to be over-written.
+
+        SmallSet<unsigned, 8> Visited;
+        if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
           LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
-          TII->moveToVALU(MI);
-        } else {
-          LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
-          TII->legalizeOperands(MI, MDT);
+          TII->moveToVALU(MI, MDT);
         }
 
         break;
commit	37bd9bd13750a368c0468f0768f29edc37dc540a	[log] [tgz]
author	Alexander Timofeev <Alexander.Timofeev@amd.com>	Thu Jun 06 21:13:02 2019 +0000
committer	Alexander Timofeev <Alexander.Timofeev@amd.com>	Thu Jun 06 21:13:02 2019 +0000
tree	a1389c1689b8a5bd10f976aa66e541bb47ea255f
parent	169fc2b0209d5574fca0927a707706ea2d5f5a09 [diff] [blame]