[AMDGPU] Preliminary patch for divergence driven instruction selection. Immediate selection predicate changed
Differential revision: https://reviews.llvm.org/D51734
Reviewers: rampitec
llvm-svn: 341928
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c95776d..d689d92 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -101,7 +101,7 @@
   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
   bool isNoNanSrc(SDValue N) const;
   bool isInlineImmediate(const SDNode *N) const;
-
+  bool isVGPRImm(const SDNode *N) const;
   bool isUniformBr(const SDNode *N) const;
 
   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
@@ -2068,6 +2068,56 @@
   return isExtractHiElt(In, Src);
 }
 
+bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    return false;
+  }
+  const SIRegisterInfo *SIRI =
+    static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+  const SIInstrInfo * SII =
+    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+  unsigned Limit = 0;
+  bool AllUsesAcceptSReg = true;
+  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+    Limit < 10 && U != E; ++U, ++Limit) {
+    const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+
+    // If the register class is unknown, it could be an unknown
+    // register class that needs to be an SGPR, e.g. an inline asm
+    // constraint
+    if (!RC || SIRI->isSGPRClass(RC))
+      return false;
+
+    if (RC != &AMDGPU::VS_32RegClass) {
+      AllUsesAcceptSReg = false;
+      SDNode * User = *U;
+      if (User->isMachineOpcode()) {
+        unsigned Opc = User->getMachineOpcode();
+        MCInstrDesc Desc = SII->get(Opc);
+        if (Desc.isCommutable()) {
+          unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
+          unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
+          if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
+            unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
+            const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
+            if (CommutedRC == &AMDGPU::VS_32RegClass)
+              AllUsesAcceptSReg = true;
+          }
+        }
+      }
+      // If "AllUsesAcceptSReg == false" so far we haven't suceeded
+      // commuting current user. This means have at least one use
+      // that strictly require VGPR. Thus, we will not attempt to commute
+      // other user instructions.
+      if (!AllUsesAcceptSReg)
+        break;
+    }
+  }
+  return !AllUsesAcceptSReg && (Limit < 10);
+}
+
+
 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   const AMDGPUTargetLowering& Lowering =
     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9b870fb..df3bed4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1421,10 +1421,15 @@
 // TargetInstrInfo::commuteInstruction uses it.
 bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
                                         unsigned &SrcOpIdx1) const {
-  if (!MI.isCommutable())
+  return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
+}
+
+bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0,
+                                        unsigned &SrcOpIdx1) const {
+  if (!Desc.isCommutable())
     return false;
 
-  unsigned Opc = MI.getOpcode();
+  unsigned Opc = Desc.getOpcode();
   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
   if (Src0Idx == -1)
     return false;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1a7fc02..4685441 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -227,6 +227,9 @@
   bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
                              unsigned &SrcOpIdx2) const override;
 
+  bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
+   unsigned & SrcOpIdx1) const;
+
   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 27bbaf3..0a53b0d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -495,24 +495,7 @@
 }]>;
 
 class VGPRImm <dag frag> : PatLeaf<frag, [{
-  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    return false;
-  }
-  const SIRegisterInfo *SIRI =
-      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
-  unsigned Limit = 0;
-  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
-         Limit < 10 && U != E; ++U, ++Limit) {
-    const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
-
-    // If the register class is unknown, it could be an unknown
-    // register class that needs to be an SGPR, e.g. an inline asm
-    // constraint
-    if (!RC || SIRI->isSGPRClass(RC))
-      return false;
-  }
-
-  return Limit < 10;
+  return isVGPRImm(N);
 }]>;
 
 def NegateImm : SDNodeXForm<imm, [{