Teach two-address pass to do some coalescing while eliminating REG_SEQUENCE
instructions.

e.g.
%reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
%reg1027<def> = EXTRACT_SUBREG %reg1026, 6
%reg1028<def> = EXTRACT_SUBREG %reg1026<kill>, 5
...
%reg1029<def> = REG_SEQUENCE %reg1028<kill>, 5, %reg1027<kill>, 6, %reg1028, 7, %reg1027, 8, %reg1028, 9, %reg1027, 10, %reg1030<kill>, 11, %reg1032<kill>, 12

After REG_SEQUENCE is eliminated, we are left with:

%reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
%reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
%reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5

The regular coalescer will not be able to coalesce reg1026 and reg1029 because it doesn't
know how to combine sub-register indices 5 and 6. Now 2-address pass will consult the
target whether sub-registers 5 and 6 of reg1026 can be combined to into a larger
sub-register (or combined to be reg1026 itself as is the case here). If it is possible, 
it will be able to replace references of reg1026 with reg1029 + the larger sub-register
index.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103835 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 9b7dc30..9dcdce0 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -351,6 +351,123 @@
   return 0;
 }
 
+bool
+ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
+                                          SmallVectorImpl<unsigned> &SubIndices,
+                                          unsigned &NewSubIdx) const {
+
+  unsigned Size = RC->getSize() * 8;
+  if (Size < 6)
+    return 0;
+
+  NewSubIdx = 0;  // Whole register.
+  unsigned NumRegs = SubIndices.size();
+  if (NumRegs == 8) {
+    // 8 D registers -> 1 QQQQ register.
+    return (Size == 512 &&
+            SubIndices[0] == ARM::DSUBREG_0 &&
+            SubIndices[1] == ARM::DSUBREG_1 &&
+            SubIndices[2] == ARM::DSUBREG_2 &&
+            SubIndices[3] == ARM::DSUBREG_3 &&
+            SubIndices[4] == ARM::DSUBREG_4 &&
+            SubIndices[5] == ARM::DSUBREG_5 &&
+            SubIndices[6] == ARM::DSUBREG_6 &&
+            SubIndices[7] == ARM::DSUBREG_7);
+  } else if (NumRegs == 4) {
+    if (SubIndices[0] == ARM::QSUBREG_0) {
+      // 4 Q registers -> 1 QQQQ register.
+      return (Size == 512 &&
+              SubIndices[1] == ARM::QSUBREG_1 &&
+              SubIndices[2] == ARM::QSUBREG_2 &&
+              SubIndices[3] == ARM::QSUBREG_3);
+    } else if (SubIndices[0] == ARM::DSUBREG_0) {
+      // 4 D registers -> 1 QQ register.
+      if (Size >= 256 &&
+          SubIndices[1] == ARM::DSUBREG_1 &&
+          SubIndices[2] == ARM::DSUBREG_2 &&
+          SubIndices[3] == ARM::DSUBREG_3) {
+        if (Size == 512)
+          NewSubIdx = ARM::QQSUBREG_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::DSUBREG_4) {
+      // 4 D registers -> 1 QQ register (2nd).
+      if (Size == 512 &&
+          SubIndices[1] == ARM::DSUBREG_5 &&
+          SubIndices[2] == ARM::DSUBREG_6 &&
+          SubIndices[3] == ARM::DSUBREG_7) {
+        NewSubIdx = ARM::QQSUBREG_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::SSUBREG_0) {
+      // 4 S registers -> 1 Q register.
+      if (Size >= 128 &&
+          SubIndices[1] == ARM::SSUBREG_1 &&
+          SubIndices[2] == ARM::SSUBREG_2 &&
+          SubIndices[3] == ARM::SSUBREG_3) {
+        if (Size >= 256)
+          NewSubIdx = ARM::QSUBREG_0;
+        return true;
+      }
+    }
+  } else if (NumRegs == 2) {
+    if (SubIndices[0] == ARM::QSUBREG_0) {
+      // 2 Q registers -> 1 QQ register.
+      if (Size >= 256 && SubIndices[1] == ARM::QSUBREG_1) {
+        if (Size == 512)
+          NewSubIdx = ARM::QQSUBREG_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::QSUBREG_2) {
+      // 2 Q registers -> 1 QQ register (2nd).
+      if (Size == 512 && SubIndices[1] == ARM::QSUBREG_3) {
+        NewSubIdx = ARM::QQSUBREG_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::DSUBREG_0) {
+      // 2 D registers -> 1 Q register.
+      if (Size >= 128 && SubIndices[1] == ARM::DSUBREG_1) {
+        if (Size >= 256)
+          NewSubIdx = ARM::QSUBREG_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::DSUBREG_2) {
+      // 2 D registers -> 1 Q register (2nd).
+      if (Size >= 256 && SubIndices[1] == ARM::DSUBREG_3) {
+        NewSubIdx = ARM::QSUBREG_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::DSUBREG_4) {
+      // 2 D registers -> 1 Q register (3rd).
+      if (Size == 512 && SubIndices[1] == ARM::DSUBREG_5) {
+        NewSubIdx = ARM::QSUBREG_2;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::DSUBREG_6) {
+      // 2 D registers -> 1 Q register (3rd).
+      if (Size == 512 && SubIndices[1] == ARM::DSUBREG_7) {
+        NewSubIdx = ARM::QSUBREG_3;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::SSUBREG_0) {
+      // 2 S registers -> 1 D register.
+      if (SubIndices[1] == ARM::SSUBREG_1) {
+        if (Size >= 128)
+          NewSubIdx = ARM::DSUBREG_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::SSUBREG_2) {
+      // 2 S registers -> 1 D register (2nd).
+      if (Size >= 128 && SubIndices[1] == ARM::SSUBREG_3) {
+        NewSubIdx = ARM::DSUBREG_1;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
 const TargetRegisterClass *
 ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
   return ARM::GPRRegisterClass;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 456c392..2c9c82d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -81,6 +81,15 @@
   getMatchingSuperRegClass(const TargetRegisterClass *A,
                            const TargetRegisterClass *B, unsigned Idx) const;
 
+  /// canCombinedSubRegIndex - Given a register class and a list of sub-register
+  /// indices, return true if it's possible to combine the sub-register indices
+  /// into one that corresponds to a larger sub-register. Return the new sub-
+  /// register index by reference. Note the new index by be zero if the given
+  /// sub-registers combined to form the whole register.
+  virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
+                                      SmallVectorImpl<unsigned> &SubIndices,
+                                      unsigned &NewSubIdx) const;
+
   const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
 
   std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index efc0cbb..62514c5 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -31,7 +31,8 @@
     SSUBREG_0 = 1,  SSUBREG_1 = 2,  SSUBREG_2 = 3,  SSUBREG_3 = 4,
     DSUBREG_0 = 5,  DSUBREG_1 = 6,  DSUBREG_2 = 7,  DSUBREG_3 = 8,
     DSUBREG_4 = 9,  DSUBREG_5 = 10, DSUBREG_6 = 11, DSUBREG_7 = 12,
-    QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16
+    QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16,
+    QQSUBREG_0= 17, QQSUBREG_1= 18
   };
 }
 
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index ae2b95b..80325ae 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -465,6 +465,10 @@
 def arm_qsubreg_2 : PatLeaf<(i32 15)>;
 def arm_qsubreg_3 : PatLeaf<(i32 16)>;
 
+def arm_qqsubreg_0 : PatLeaf<(i32 17)>;
+def arm_qqsubreg_1 : PatLeaf<(i32 18)>;
+
+
 // S sub-registers of D registers.
 def : SubRegSet<1, [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
                     D8,  D9,  D10, D11, D12, D13, D14, D15],
@@ -552,3 +556,10 @@
                     [Q2,    Q6,    Q10,   Q14]>;
 def : SubRegSet<16, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
                     [Q3,    Q7,    Q11,   Q15]>;
+
+// QQ sub-registers of QQQQQQQQ registers.
+def : SubRegSet<17, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
+                    [QQ0,   QQ2,   QQ4,   QQ6]>;
+def : SubRegSet<18, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
+                    [QQ1,   QQ3,   QQ5,   QQ7]>;
+
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 017e6f7..77375e5 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -414,7 +414,9 @@
       return false;
     LastSrcReg = VirtReg;
     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-    if (RC != ARM::QPRRegisterClass && RC != ARM::QQPRRegisterClass)
+    if (RC != ARM::QPRRegisterClass &&
+        RC != ARM::QQPRRegisterClass &&
+        RC != ARM::QQQQPRRegisterClass)
       return false;
     unsigned SubIdx = DefMI->getOperand(2).getImm();
     if (LastSubIdx) {
@@ -432,7 +434,7 @@
 
   // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
   // currently required for correctness. e.g.
-  // 	%reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
+  //  %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
   //  %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
   //  %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
   //  VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,