[AMDGPU] Fix dwordx3/southern-islands failures.

This commit fixes the dwordx3/southern-islands failures that were found
in bugzilla https://bugs.llvm.org/show_bug.cgi?id=40129, by not
generating the dwordx3 variants of load/store instructions that were
added to the ISA after southern islands.

Differential Revision: https://reviews.llvm.org/D56434

llvm-svn: 350838
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 52bbe5c..be291b1 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -160,7 +160,7 @@
   bool OptimizeAgain;
 
   static bool offsetsCanBeCombined(CombineInfo &CI);
-  static bool widthsFit(const CombineInfo &CI);
+  static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
   static unsigned getNewOpcode(const CombineInfo &CI);
   static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI);
   const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI);
@@ -367,11 +367,12 @@
   return false;
 }
 
-bool SILoadStoreOptimizer::widthsFit(const CombineInfo &CI) {
+bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
+                                     const CombineInfo &CI) {
   const unsigned Width = (CI.Width0 + CI.Width1);
   switch (CI.InstClass) {
   default:
-    return Width <= 4;
+    return (Width <= 4) && (STM.hasDwordx3LoadStores() || (Width != 3));
   case S_BUFFER_LOAD_IMM:
     switch (Width) {
     default:
@@ -645,7 +646,7 @@
       // We also need to go through the list of instructions that we plan to
       // move and make sure they are all safe to move down past the merged
       // instruction.
-      if (widthsFit(CI) && offsetsCanBeCombined(CI))
+      if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
         if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
           return true;
     }