R600: Add FetchInst bit to instruction defs to denote vertex/tex instructions

v2[Vincent Lejeune]: Split FetchInst into usesTextureCache/usesVertexCache

llvm-svn: 180755
diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
index 0f356a1..a7e1d7b 100644
--- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -33,6 +33,7 @@
   DefaultSize[0] = 64;
   DefaultSize[1] = 1;
   DefaultSize[2] = 1;
+  HasVertexCache = false;
   ParseSubtargetFeatures(GPU, FS);
   DevName = GPU;
   Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
@@ -53,6 +54,10 @@
   return Is64bit;
 }
 bool
+AMDGPUSubtarget::hasVertexCache() const {
+  return HasVertexCache;
+}
+bool
 AMDGPUSubtarget::isTargetELF() const {
   return false;
 }
diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h
index 1973fc6..b6501a4 100644
--- a/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h
@@ -36,6 +36,7 @@
   bool Is32on64bit;
   bool DumpCode;
   bool R600ALUInst;
+  bool HasVertexCache;
 
   InstrItineraryData InstrItins;
 
@@ -48,6 +49,7 @@
 
   bool isOverride(AMDGPUDeviceInfo::Caps) const;
   bool is64bit() const;
+  bool hasVertexCache() const;
 
   // Helper functions to simplify if statements
   bool isTargetELF() const;
diff --git a/llvm/lib/Target/R600/AMDILBase.td b/llvm/lib/Target/R600/AMDILBase.td
index c12cedc..e221110 100644
--- a/llvm/lib/Target/R600/AMDILBase.td
+++ b/llvm/lib/Target/R600/AMDILBase.td
@@ -74,6 +74,10 @@
         "false",
         "Older version of ALU instructions encoding.">;
 
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+        "HasVertexCache",
+        "true",
+        "Specify use of dedicated vertex cache.">;
 
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
diff --git a/llvm/lib/Target/R600/Processors.td b/llvm/lib/Target/R600/Processors.td
index c2cc63a..abefba2 100644
--- a/llvm/lib/Target/R600/Processors.td
+++ b/llvm/lib/Target/R600/Processors.td
@@ -13,23 +13,38 @@
 
 class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
 : Processor<Name, itin, Features>;
-def : Proc<"",           R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"r600",       R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"rs880",      R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"rv670",      R600_EG_Itin, [FeatureR600ALUInst, FeatureFP64]>;
-def : Proc<"rv710",      R600_EG_Itin, []>;
-def : Proc<"rv730",      R600_EG_Itin, []>;
-def : Proc<"rv770",      R600_EG_Itin, [FeatureFP64]>;
-def : Proc<"cedar",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"sumo",       R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"redwood",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"juniper",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cypress",    R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"barts",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"turks",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"caicos",     R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cayman",     R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"SI",         SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"",           R600_EG_Itin,
+    [FeatureR600ALUInst, FeatureVertexCache]>;
+def : Proc<"r600",       R600_EG_Itin,
+    [FeatureR600ALUInst , FeatureVertexCache]>;
+def : Proc<"rs880",      R600_EG_Itin,
+    [FeatureR600ALUInst]>;
+def : Proc<"rv670",      R600_EG_Itin,
+    [FeatureR600ALUInst, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"rv710",      R600_EG_Itin,
+    [FeatureVertexCache]>;
+def : Proc<"rv730",      R600_EG_Itin,
+    [FeatureVertexCache]>;
+def : Proc<"rv770",      R600_EG_Itin,
+    [FeatureFP64, FeatureVertexCache]>;
+def : Proc<"cedar",      R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"redwood",    R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"sumo",       R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper",    R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"cypress",    R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"barts",      R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"turks",      R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"caicos",     R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman",     R600_EG_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureFP64]>;def : Proc<"SI",         SI_Itin, [Feature64BitPtr, FeatureFP64]>;
 def : Proc<"tahiti",     SI_Itin, [Feature64BitPtr, FeatureFP64]>;
 def : Proc<"pitcairn",   SI_Itin, [Feature64BitPtr, FeatureFP64]>;
 def : Proc<"verde",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
diff --git a/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 94736ad..f978612 100644
--- a/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -32,6 +32,7 @@
 private:
   enum ControlFlowInstruction {
     CF_TC,
+    CF_VC,
     CF_CALL_FS,
     CF_WHILE_LOOP,
     CF_END_LOOP,
@@ -48,39 +49,6 @@
   unsigned MaxFetchInst;
   const AMDGPUSubtarget &ST;
 
-  bool isFetch(const MachineInstr *MI) const {
-    switch (MI->getOpcode()) {
-    case AMDGPU::TEX_VTX_CONSTBUF:
-    case AMDGPU::TEX_VTX_TEXBUF:
-    case AMDGPU::TEX_LD:
-    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
-    case AMDGPU::TEX_GET_GRADIENTS_H:
-    case AMDGPU::TEX_GET_GRADIENTS_V:
-    case AMDGPU::TEX_SET_GRADIENTS_H:
-    case AMDGPU::TEX_SET_GRADIENTS_V:
-    case AMDGPU::TEX_SAMPLE:
-    case AMDGPU::TEX_SAMPLE_C:
-    case AMDGPU::TEX_SAMPLE_L:
-    case AMDGPU::TEX_SAMPLE_C_L:
-    case AMDGPU::TEX_SAMPLE_LB:
-    case AMDGPU::TEX_SAMPLE_C_LB:
-    case AMDGPU::TEX_SAMPLE_G:
-    case AMDGPU::TEX_SAMPLE_C_G:
-    case AMDGPU::TXD:
-    case AMDGPU::TXD_SHADOW:
-    case AMDGPU::VTX_READ_GLOBAL_8_eg:
-    case AMDGPU::VTX_READ_GLOBAL_32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_128_eg:
-    case AMDGPU::VTX_READ_PARAM_8_eg:
-    case AMDGPU::VTX_READ_PARAM_16_eg:
-    case AMDGPU::VTX_READ_PARAM_32_eg:
-    case AMDGPU::VTX_READ_PARAM_128_eg:
-     return true;
-    default:
-      return false;
-    }
-  }
-
   bool IsTrivialInst(MachineInstr *MI) const {
     switch (MI->getOpcode()) {
     case AMDGPU::KILL:
@@ -98,6 +66,9 @@
     case CF_TC:
       Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
       break;
+    case CF_VC:
+      Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
+      break;
     case CF_CALL_FS:
       Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
       break;
@@ -139,17 +110,19 @@
       unsigned CfAddress) const {
     MachineBasicBlock::iterator ClauseHead = I;
     unsigned AluInstCount = 0;
+    bool IsTex = TII->usesTextureCache(ClauseHead);
     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
       if (IsTrivialInst(I))
         continue;
-      if (!isFetch(I))
+      if ((IsTex && !TII->usesTextureCache(I)) ||
+          (!IsTex && !TII->usesVertexCache(I)))
         break;
       AluInstCount ++;
       if (AluInstCount > MaxFetchInst)
         break;
     }
     BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
-        getHWInstrDesc(CF_TC))
+        getHWInstrDesc(IsTex?CF_TC:CF_VC))
         .addImm(CfAddress) // ADDR
         .addImm(AluInstCount); // COUNT
     return I;
@@ -211,7 +184,7 @@
       }
       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E;) {
-        if (isFetch(I)) {
+        if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
           DEBUG(dbgs() << CfCount << ":"; I->dump(););
           I = MakeFetchClause(MBB, I, 0);
           CfCount++;
diff --git a/llvm/lib/Target/R600/R600Defines.h b/llvm/lib/Target/R600/R600Defines.h
index 16cfcf5..bdda232 100644
--- a/llvm/lib/Target/R600/R600Defines.h
+++ b/llvm/lib/Target/R600/R600Defines.h
@@ -39,7 +39,9 @@
     //FlagOperand bits 7, 8
     NATIVE_OPERANDS = (1 << 9),
     OP1 = (1 << 10),
-    OP2 = (1 << 11)
+    OP2 = (1 << 11),
+    VTX_INST  = (1 << 12),
+    TEX_INST = (1 << 13)
   };
 }
 
diff --git a/llvm/lib/Target/R600/R600InstrInfo.cpp b/llvm/lib/Target/R600/R600InstrInfo.cpp
index b232188..1800d20 100644
--- a/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -29,7 +29,8 @@
 
 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
   : AMDGPUInstrInfo(tm),
-    RI(tm, *this)
+    RI(tm, *this),
+    ST(tm.getSubtarget<AMDGPUSubtarget>())
   { }
 
 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
@@ -139,6 +140,23 @@
           (TargetFlags & R600_InstFlag::OP3));
 }
 
+bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
+  return ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST;
+}
+
+bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
+  return usesVertexCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
+  return (!ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST) ||
+      (get(Opcode).TSFlags & R600_InstFlag::TEX_INST);
+}
+
+bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
+  return usesTextureCache(MI->getOpcode());
+}
+
 bool
 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
     const {
diff --git a/llvm/lib/Target/R600/R600InstrInfo.h b/llvm/lib/Target/R600/R600InstrInfo.h
index dbae900..2146788 100644
--- a/llvm/lib/Target/R600/R600InstrInfo.h
+++ b/llvm/lib/Target/R600/R600InstrInfo.h
@@ -33,6 +33,7 @@
   class R600InstrInfo : public AMDGPUInstrInfo {
   private:
   const R600RegisterInfo RI;
+  const AMDGPUSubtarget &ST;
 
   int getBranchInstr(const MachineOperand &op) const;
 
@@ -53,6 +54,11 @@
   /// \returns true if this \p Opcode represents an ALU instruction.
   bool isALUInstr(unsigned Opcode) const;
 
+  bool usesVertexCache(unsigned Opcode) const;
+  bool usesVertexCache(const MachineInstr *MI) const;
+  bool usesTextureCache(unsigned Opcode) const;
+  bool usesTextureCache(const MachineInstr *MI) const;
+
   bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
   bool canBundle(const std::vector<MachineInstr *> &) const;
 
diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td
index f8e105c..18760cb 100644
--- a/llvm/lib/Target/R600/R600Instructions.td
+++ b/llvm/lib/Target/R600/R600Instructions.td
@@ -25,6 +25,8 @@
   bit Op1 = 0;
   bit Op2 = 0;
   bit HasNativeOperands = 0;
+  bit VTXInst = 0;
+  bit TEXInst = 0;
 
   let Namespace = "AMDGPU";
   let OutOperandList = outs;
@@ -43,6 +45,8 @@
   let TSFlags{9} = HasNativeOperands;
   let TSFlags{10} = Op1;
   let TSFlags{11} = Op2;
+  let TSFlags{12} = VTXInst;
+  let TSFlags{13} = TEXInst;
 }
 
 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -478,6 +482,8 @@
     let COORD_TYPE_Y = 0;
     let COORD_TYPE_Z = 0;
     let COORD_TYPE_W = 0;
+
+    let TEXInst = 1;
   }
 
 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
@@ -1784,6 +1790,8 @@
   // VTX_WORD3 (Padding)
   //
   // Inst{127-96} = 0;
+
+  let VTXInst = 1;
 }
 
 class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
@@ -2012,15 +2020,17 @@
   (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
   [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))], NullALU> {
->;
+  let TEXInst = 1;
+}
 
 def TXD_SHADOW: InstR600 <
   (outs R600_Reg128:$dst),
   (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
   [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], NullALU
->;
-
+> {
+  let TEXInst = 1;
+}
 } // End isPseudo = 1
 } // End usesCustomInserter = 1
 
@@ -2106,6 +2116,7 @@
 // VTX_WORD3 (Padding)
 //
 // Inst{127-96} = 0;
+  let VTXInst = 1;
 }
 
 def TEX_VTX_TEXBUF:
@@ -2159,6 +2170,7 @@
 // VTX_WORD3 (Padding)
 //
 // Inst{127-96} = 0;
+  let VTXInst = 1;
 }