AMDGPU/GlobalISel: Select G_BRCOND for vcc

llvm-svn: 364795
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5eab5cb..a084b5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -59,8 +59,9 @@
 
 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
 
-static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
-  assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
+static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Reg == AMDGPU::SCC;
 
   auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
   const TargetRegisterClass *RC =
@@ -76,15 +77,16 @@
   return RB->getID() == AMDGPU::SCCRegBankID;
 }
 
-static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI,
-                  const SIRegisterInfo &TRI) {
-  assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
+bool AMDGPUInstructionSelector::isVCC(Register Reg,
+                                      const MachineRegisterInfo &MRI) const {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Reg == TRI.getVCC();
 
   auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
   const TargetRegisterClass *RC =
       RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
   if (RC) {
-    return RC == TRI.getWaveMaskRegClass() &&
+    return RC->hasSuperClassEq(TRI.getBoolRC()) &&
            MRI.getType(Reg).getSizeInBits() == 1;
   }
 
@@ -106,7 +108,7 @@
     unsigned DstReg = I.getOperand(0).getReg();
 
     // Specially handle scc->vcc copies.
-    if (isVCC(DstReg, MRI, TRI)) {
+    if (isVCC(DstReg, MRI)) {
       const DebugLoc &DL = I.getDebugLoc();
       BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
         .addImm(0)
@@ -991,27 +993,41 @@
   Register CondReg = CondOp.getReg();
   const DebugLoc &DL = I.getDebugLoc();
 
+  unsigned BrOpcode;
+  Register CondPhysReg;
+  const TargetRegisterClass *ConstrainRC;
+
+  // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
+  // whether the branch is uniform when selecting the instruction. In
+  // GlobalISel, we should push that decision into RegBankSelect. Assume for now
+  // RegBankSelect knows what it's doing if the branch condition is scc, even
+  // though it currently does not.
   if (isSCC(CondReg, MRI)) {
-    // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
-    // whether the branch is uniform when selecting the instruction. In
-    // GlobalISel, we should push that decision into RegBankSelect. Assume for now
-    // RegBankSelect knows what it's doing if the branch condition is scc, even
-    // though it currently does not.
-    BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
-      .addReg(CondReg);
-    if (!MRI.getRegClassOrNull(CondReg)) {
-      const TargetRegisterClass *RC
-        = TRI.getConstrainedRegClassForOperand(CondOp, MRI);
-      MRI.setRegClass(CondReg, RC);
-    }
+    CondPhysReg = AMDGPU::SCC;
+    BrOpcode = AMDGPU::S_CBRANCH_SCC1;
+    ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
+  } else if (isVCC(CondReg, MRI)) {
+    // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
+    // We sort of know that a VCC producer based on the register bank, that ands
+    // inactive lanes with 0. What if there was a logical operation with vcc
+    // producers in different blocks/with different exec masks?
+    // FIXME: Should scc->vcc copies and with exec?
+    CondPhysReg = TRI.getVCC();
+    BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
+    ConstrainRC = TRI.getBoolRC();
+  } else
+    return false;
 
-    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_CBRANCH_SCC1))
-      .addMBB(I.getOperand(1).getMBB());
-    I.eraseFromParent();
-    return true;
-  }
+  if (!MRI.getRegClassOrNull(CondReg))
+    MRI.setRegClass(CondReg, ConstrainRC);
 
-  return false;
+  BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
+    .addReg(CondReg);
+  BuildMI(*BB, &I, DL, TII.get(BrOpcode))
+    .addMBB(I.getOperand(1).getMBB());
+
+  I.eraseFromParent();
+  return true;
 }
 
 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 6b50ce2..cf0ca72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -17,6 +17,7 @@
 #include "AMDGPUArgumentUsageInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/IR/InstrTypes.h"
 
@@ -59,6 +60,8 @@
   };
 
   bool isInstrUniform(const MachineInstr &MI) const;
+  bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const;
+
   /// tblgen-erated 'select' implementation.
   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;