R600/SI: Fix bug with v_interp_p1_f32 instructions on 16 bank lds chips

The src and dst register cannot be the same on chips with 16 lds banks.

llvm-svn: 238147
diff --git a/llvm/lib/Target/R600/AMDGPU.td b/llvm/lib/Target/R600/AMDGPU.td
index d70c167..2e7e39a 100644
--- a/llvm/lib/Target/R600/AMDGPU.td
+++ b/llvm/lib/Target/R600/AMDGPU.td
@@ -132,6 +132,15 @@
 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 
+class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
+      "ldsbankcount"#Value,
+      "LDSBankCount",
+      !cast<string>(Value),
+      "The number of LDS banks per compute unit.">;
+
+def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
+def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
+
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
         "localmemorysize"#Value,
         "LocalMemorySize",
@@ -189,7 +198,8 @@
 
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
-         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>;
+         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+         FeatureLDSBankCount32]>;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
@@ -199,7 +209,7 @@
 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
          FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
-         FeatureGCN3Encoding, FeatureCIInsts]>;
+         FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
 
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
index f78e78a..5288866 100644
--- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -72,6 +72,7 @@
       WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
       EnableVGPRSpilling(false), SGPRInitBug(false),
       IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false),
+      LDSBankCount(0),
       FrameLowering(TargetFrameLowering::StackGrowsUp,
                     64 * 16, // Maximum stack alignment (long16)
                     0),
diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h
index 57a084e..b262cdf 100644
--- a/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h
@@ -76,6 +76,7 @@
   bool GCN3Encoding;
   bool CIInsts;
   bool FeatureDisable;
+  int LDSBankCount;
 
   AMDGPUFrameLowering FrameLowering;
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -228,6 +229,10 @@
     return SGPRInitBug;
   }
 
+  int getLDSBankCount() const {
+    return LDSBankCount;
+  }
+
   unsigned getAmdKernelCodeChipID() const;
 
   bool enableMachineScheduler() const override {
diff --git a/llvm/lib/Target/R600/Processors.td b/llvm/lib/Target/R600/Processors.td
index 82c6d13..c0ffede 100644
--- a/llvm/lib/Target/R600/Processors.td
+++ b/llvm/lib/Target/R600/Processors.td
@@ -103,17 +103,24 @@
 // Sea Islands
 //===----------------------------------------------------------------------===//
 
-def : ProcessorModel<"bonaire",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
-
-def : ProcessorModel<"kabini",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
-
-def : ProcessorModel<"kaveri",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
-
-def : ProcessorModel<"hawaii", SIFullSpeedModel,
-  [FeatureSeaIslands, FeatureFastFMAF32]
+def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
 >;
 
-def : ProcessorModel<"mullins",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount16]
+>;
+
+def : ProcessorModel<"kaveri",     SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"hawaii", SIFullSpeedModel,
+  [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount16]>;
 
 //===----------------------------------------------------------------------===//
 // Volcanic Islands
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 875f9c9..15c2f3e 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -34,6 +34,9 @@
 
 def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
 
+def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
+def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
+
 def SWaitMatchClass : AsmOperandClass {
   let Name = "SWaitCnt";
   let RenderMethod = "addImmOperands";
@@ -1436,13 +1439,27 @@
 let Uses = [M0] in {
 
 // FIXME: Specify SchedRW for VINTRP insturctions.
-defm V_INTERP_P1_F32 : VINTRP_m <
-  0x00000000, 
+
+multiclass V_INTERP_P1_F32_m : VINTRP_m <
+  0x00000000,
   (outs VGPR_32:$dst),
   (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
   "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
   [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
-                                           (i32 imm:$attr)))]>;
+                                           (i32 imm:$attr)))]
+>;
+
+let OtherPredicates = [has32BankLDS] in {
+
+defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS]
+
+let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst" in {
+
+defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst"
 
 defm V_INTERP_P2_F32 : VINTRP_m <
   0x00000001,