R600/SI: Partially move operand legalization to post-isel hook. Disable the SGPR usage restriction parts of the DAG legalizeOperands. It now should only be doing immediate folding until it can be replaced later. The real legalization work is now done by the other SIInstrInfo::legalizeOperands llvm-svn: 218531

commit: cb0ac3d1fbe29e6c0be2a24c56c06a3b26ce0e39 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Fri Sep 26 17:54:59 2014 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Fri Sep 26 17:54:59 2014 +0000
tree: b6e793e2ca4667053b0f88c09d18db8ae606e97d
parent: 92befe7996bd4d20519f481b11b642a513180bc3 [diff]
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp
index 8a3ab46..417356d 100644
--- a/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/llvm/lib/Target/R600/SIISelLowering.cpp

@@ -1648,57 +1648,6 @@
   return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
 }
 
-/// \brief Make sure that we don't exeed the number of allowed scalars
-void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
-                                       unsigned RegClass,
-                                       bool &ScalarSlotUsed) const {
-
-  if (!isVSrc(RegClass))
-    return;
-
-  // First map the operands register class to a destination class
-  switch (RegClass) {
-    case AMDGPU::VSrc_32RegClassID:
-    case AMDGPU::VCSrc_32RegClassID:
-      RegClass = AMDGPU::VReg_32RegClassID;
-      break;
-    case AMDGPU::VSrc_64RegClassID:
-    case AMDGPU::VCSrc_64RegClassID:
-      RegClass = AMDGPU::VReg_64RegClassID;
-      break;
-   default:
-    llvm_unreachable("Unknown vsrc reg class");
-  }
-
-  // Nothing to do if they fit naturally
-  if (fitsRegClass(DAG, Operand, RegClass))
-    return;
-
-  // If the scalar slot isn't used yet use it now
-  if (!ScalarSlotUsed) {
-    ScalarSlotUsed = true;
-    return;
-  }
-
-  // This is a conservative aproach. It is possible that we can't determine the
-  // correct register class and copy too often, but better safe than sorry.
-
-  SDNode *Node;
-  // We can't use COPY_TO_REGCLASS with FrameIndex arguments.
-  if (isa<FrameIndexSDNode>(Operand) ||
-      isa<GlobalAddressSDNode>(Operand)) {
-    unsigned Opcode = Operand.getValueType() == MVT::i32 ?
-                      AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
-    Node = DAG.getMachineNode(Opcode, SDLoc(), Operand.getValueType(),
-                              Operand);
-  } else {
-    SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
-    Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
-                              Operand.getValueType(), Operand, RC);
-  }
-  Operand = SDValue(Node, 0);
-}
-
 /// \returns true if \p Node's operands are different from the SDValue list
 /// \p Ops
 static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
@@ -1710,8 +1659,9 @@
   return false;
 }
 
-/// \brief Try to commute instructions and insert copies in order to satisfy the
-/// operand constraints.
+/// TODO: This needs to be removed. It's current primary purpose is to fold
+/// immediates into operands when legal. The legalization parts are redundant
+/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook.
 SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
                                            SelectionDAG &DAG) const {
   // Original encoding (either e32 or e64)
@@ -1784,11 +1734,9 @@
     // Is this a VSrc or SSrc operand?
     unsigned RegClass = Desc->OpInfo[Op].RegClass;
     if (isVSrc(RegClass) || isSSrc(RegClass)) {
-      // Try to fold the immediates
-      if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
-        // Folding didn't work, make sure we don't hit the SReg limit.
-        ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
-      }
+      // Try to fold the immediates. If this ends up with multiple constant bus
+      // uses, it will be legalized later.
+      foldImm(Ops[i], Immediate, ScalarSlotUsed);
       continue;
     }
 
@@ -1938,6 +1886,8 @@
   const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
       getTargetMachine().getSubtargetImpl()->getInstrInfo());
 
+  TII->legalizeOperands(MI);
+
   if (TII->isMIMG(MI->getOpcode())) {
     unsigned VReg = MI->getOperand(0).getReg();
     unsigned Writemask = MI->getOperand(1).getImm();

diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h
index f953b48..9cf4dbc 100644
--- a/llvm/lib/Target/R600/SIISelLowering.h
+++ b/llvm/lib/Target/R600/SIISelLowering.h

@@ -47,8 +47,6 @@
                                                 const SDValue &Op) const;
   bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
                     unsigned RegClass) const;
-  void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
-                       unsigned RegClass, bool &ScalarSlotUsed) const;
 
   SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const;
   void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;

diff --git a/llvm/lib/Target/R600/SIInstrFormats.td b/llvm/lib/Target/R600/SIInstrFormats.td
index 8369a0c..c1fc4b3 100644
--- a/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/llvm/lib/Target/R600/SIInstrFormats.td

@@ -42,6 +42,10 @@
   let TSFlags{10} = MUBUF;
   let TSFlags{11} = MTBUF;
   let TSFlags{12} = FLAT;
+
+  // Most instructions require adjustments after selection to satisfy
+  // operand requirements.
+  let hasPostISelHook = 1;
 }
 
 class Enc32 {

diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp
index fb5dad1..ed8d979 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/llvm/lib/Target/R600/SIInstrInfo.cpp

@@ -1394,20 +1394,39 @@
 
     int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
 
-    // First we need to consider the instruction's operand requirements before
-    // legalizing. Some operands are required to be SGPRs, but we are still
-    // bound by the constant bus requirement to only use one.
-    //
-    // If the operand's class is an SGPR, we can never move it.
-    for (unsigned i = 0; i < 3; ++i) {
-      int Idx = VOP3Idx[i];
-      if (Idx == -1)
-        break;
+    for (const MachineOperand &MO : MI->implicit_operands()) {
+      // We only care about reads.
+      if (MO.isDef())
+        continue;
 
-      if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
-        SGPRReg = MI->getOperand(Idx).getReg();
+      if (MO.getReg() == AMDGPU::VCC) {
+        SGPRReg = AMDGPU::VCC;
         break;
       }
+
+      if (MO.getReg() == AMDGPU::FLAT_SCR) {
+        SGPRReg = AMDGPU::FLAT_SCR;
+        break;
+      }
+    }
+
+
+    if (SGPRReg == AMDGPU::NoRegister) {
+      // First we need to consider the instruction's operand requirements before
+      // legalizing. Some operands are required to be SGPRs, but we are still
+      // bound by the constant bus requirement to only use one.
+      //
+      // If the operand's class is an SGPR, we can never move it.
+      for (unsigned i = 0; i < 3; ++i) {
+        int Idx = VOP3Idx[i];
+        if (Idx == -1)
+          break;
+
+        if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
+          SGPRReg = MI->getOperand(Idx).getReg();
+          break;
+        }
+      }
     }
 
     for (unsigned i = 0; i < 3; ++i) {

diff --git a/llvm/test/CodeGen/R600/fneg.f64.ll b/llvm/test/CodeGen/R600/fneg.f64.ll
index 61d9513..f0b341b 100644
--- a/llvm/test/CodeGen/R600/fneg.f64.ll
+++ b/llvm/test/CodeGen/R600/fneg.f64.ll

@@ -50,7 +50,7 @@
 ; SI-LABEL: @fneg_fold
 ; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-NOT: XOR
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], {{v\[[0-9]+:[0-9]+\]}}
+; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
 define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
   %fsub = fsub double -0.0, %in
   %fmul = fmul double %fsub, %in

diff --git a/llvm/test/CodeGen/R600/fneg.ll b/llvm/test/CodeGen/R600/fneg.ll
index 72cd15c..8631301 100644
--- a/llvm/test/CodeGen/R600/fneg.ll
+++ b/llvm/test/CodeGen/R600/fneg.ll

@@ -59,7 +59,7 @@
 ; FUNC-LABEL: @fneg_fold
 ; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
 ; SI-NOT: XOR
-; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}}
+; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
 define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
   %fsub = fsub float -0.0, %in
   %fmul = fmul float %fsub, %in

diff --git a/llvm/test/CodeGen/R600/rotl.ll b/llvm/test/CodeGen/R600/rotl.ll
index 8c86fb5..a9dee8c 100644
--- a/llvm/test/CodeGen/R600/rotl.ll
+++ b/llvm/test/CodeGen/R600/rotl.ll

@@ -8,7 +8,7 @@
 
 ; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
 ; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
+; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
 define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 entry:
   %0 = shl i32 %x, %y

diff --git a/llvm/test/CodeGen/R600/seto.ll b/llvm/test/CodeGen/R600/seto.ll
index cc942c1..eb1176f 100644
--- a/llvm/test/CodeGen/R600/seto.ll
+++ b/llvm/test/CodeGen/R600/seto.ll

@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 
-;CHECK-LABEL: @main
-;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
-
+; CHECK-LABEL: @main
+; CHECK: V_CMP_O_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
 define void @main(float %p) {
 main_body:
   %c = fcmp oeq float %p, %p

diff --git a/llvm/test/CodeGen/R600/setuo.ll b/llvm/test/CodeGen/R600/setuo.ll
index 33007fc..a78e8e6 100644
--- a/llvm/test/CodeGen/R600/setuo.ll
+++ b/llvm/test/CodeGen/R600/setuo.ll

@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 
-;CHECK-LABEL: @main
-;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
-
+; CHECK-LABEL: @main
+; CHECK: V_CMP_U_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
 define void @main(float %p) {
 main_body:
   %c = fcmp une float %p, %p
commit	cb0ac3d1fbe29e6c0be2a24c56c06a3b26ce0e39	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Fri Sep 26 17:54:59 2014 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Fri Sep 26 17:54:59 2014 +0000
tree	b6e793e2ca4667053b0f88c09d18db8ae606e97d
parent	92befe7996bd4d20519f481b11b642a513180bc3 [diff]