[AMDGPU] Divergence driven ISel. Assign register class for cross block values according to the divergence.

    Details: To make instruction selection really divergence driven it is necessary to assign
             the correct register classes to the cross block values beforehand. For the divergent targets
             same value type requires different register classes dependent on the value divergence.

    Reviewers: rampitec, nhaehnle

    Differential Revision: https://reviews.llvm.org/D59990

    This commit was reverted because of the build failure.
    The reason was mlformed patch.
    Build failure fixed.

llvm-svn: 361741
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 94b1e63..8ad7a52 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -302,52 +302,6 @@
   return true;
 }
 
-static bool phiHasVGPROperands(const MachineInstr &PHI,
-                               const MachineRegisterInfo &MRI,
-                               const SIRegisterInfo *TRI,
-                               const SIInstrInfo *TII) {
-  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
-    unsigned Reg = PHI.getOperand(i).getReg();
-    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
-      return true;
-  }
-  return false;
-}
-
-static bool phiHasBreakDef(const MachineInstr &PHI,
-                           const MachineRegisterInfo &MRI,
-                           SmallSet<unsigned, 8> &Visited) {
-  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
-    unsigned Reg = PHI.getOperand(i).getReg();
-    if (Visited.count(Reg))
-      continue;
-
-    Visited.insert(Reg);
-
-    MachineInstr *DefInstr = MRI.getVRegDef(Reg);
-    switch (DefInstr->getOpcode()) {
-    default:
-      break;
-    case AMDGPU::SI_IF_BREAK:
-      return true;
-    case AMDGPU::PHI:
-      if (phiHasBreakDef(*DefInstr, MRI, Visited))
-        return true;
-    }
-  }
-  return false;
-}
-
-static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
-                                          const TargetRegisterInfo &TRI) {
-  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
-       E = MBB.end(); I != E; ++I) {
-    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
-      return true;
-  }
-  return false;
-}
-
 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
                                     const MachineInstr *MoveImm,
                                     const SIInstrInfo *TII,
@@ -409,12 +363,6 @@
   return false;
 }
 
-static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
-                                        const TargetRegisterInfo *TRI) {
-  return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
-           return hasTerminatorThatModifiesExec(*MBB, *TRI); });
-}
-
 // Checks if there is potential path From instruction To instruction.
 // If CutOff is specified and it sits in between of that path we ignore
 // a higher portion of the path and report it is not reachable.
@@ -621,63 +569,73 @@
         break;
       }
       case AMDGPU::PHI: {
-        unsigned Reg = MI.getOperand(0).getReg();
-        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
-          break;
+        unsigned hasVGPRUses = 0;
+        SetVector<const MachineInstr *> worklist;
+        worklist.insert(&MI);
+        while (!worklist.empty()) {
+          const MachineInstr *Instr = worklist.pop_back_val();
+          unsigned Reg = Instr->getOperand(0).getReg();
+          for (const auto &Use : MRI.use_operands(Reg)) {
+            const MachineInstr *UseMI = Use.getParent();
+            if (UseMI->isCopy() || UseMI->isRegSequence()) {
+              if (UseMI->isCopy() &&
+                  TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
+                  !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
+                hasVGPRUses++;
+              }
+              worklist.insert(UseMI);
+              continue;
+            }
 
-        // We don't need to fix the PHI if the common dominator of the
-        // two incoming blocks terminates with a uniform branch.
-        bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
-        if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
-          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
-          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+            if (UseMI->isPHI()) {
+              if (!TRI->isSGPRReg(MRI, Use.getReg()))
+                hasVGPRUses++;
+              continue;
+            }
 
-          if (!predsHasDivergentTerminator(MBB0, TRI) &&
-              !predsHasDivergentTerminator(MBB1, TRI)) {
-            LLVM_DEBUG(dbgs()
-                       << "Not fixing PHI for uniform branch: " << MI << '\n');
+            unsigned OpNo = UseMI->getOperandNo(&Use);
+            const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
+            if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) {
+              const TargetRegisterClass *OpRC =
+                  TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
+              if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
+                  OpRC != &AMDGPU::VS_64RegClass) {
+                hasVGPRUses++;
+              }
+            }
+          }
+        }
+        bool hasVGPRInput = false;
+        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+          unsigned InputReg = MI.getOperand(i).getReg();
+          MachineInstr *Def = MRI.getVRegDef(InputReg);
+          if (TRI->isVGPR(MRI, InputReg)) {
+            if (Def->isCopy()) {
+              unsigned SrcReg = Def->getOperand(1).getReg();
+              const TargetRegisterClass *RC =
+                  TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
+                                                 : TRI->getPhysRegClass(SrcReg);
+              if (TRI->isSGPRClass(RC))
+                continue;
+            }
+            hasVGPRInput = true;
+            break;
+          } else if (Def->isCopy() &&
+                     TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
+            hasVGPRInput = true;
             break;
           }
         }
+        unsigned PHIRes = MI.getOperand(0).getReg();
+        const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
 
-        // If a PHI node defines an SGPR and any of its operands are VGPRs,
-        // then we need to move it to the VALU.
-        //
-        // Also, if a PHI node defines an SGPR and has all SGPR operands
-        // we must move it to the VALU, because the SGPR operands will
-        // all end up being assigned the same register, which means
-        // there is a potential for a conflict if different threads take
-        // different control flow paths.
-        //
-        // For Example:
-        //
-        // sgpr0 = def;
-        // ...
-        // sgpr1 = def;
-        // ...
-        // sgpr2 = PHI sgpr0, sgpr1
-        // use sgpr2;
-        //
-        // Will Become:
-        //
-        // sgpr2 = def;
-        // ...
-        // sgpr2 = def;
-        // ...
-        // use sgpr2
-        //
-        // The one exception to this rule is when one of the operands
-        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
-        // instruction.  In this case, there we know the program will
-        // never enter the second block (the loop) without entering
-        // the first block (where the condition is computed), so there
-        // is no chance for values to be over-written.
-
-        SmallSet<unsigned, 8> Visited;
-        if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
-          LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
-          TII->moveToVALU(MI, MDT);
+        if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
+            (hasVGPRInput || hasVGPRUses > 1)) {
+          TII->moveToVALU(MI);
+        } else {
+          TII->legalizeOperands(MI, MDT);
         }
+
         break;
       }
       case AMDGPU::REG_SEQUENCE: