Start converting NEON load/stores to use pseudo instructions, beginning here
with the VST4 instructions.  Until after register allocation, we want to
represent sets of adjacent registers by a single super-register.  These
VST4 pseudo instructions have a single QQ or QQQQ source register operand.
They get expanded to the real VST4 instructions with 4 separate D register
operands.  Once this conversion is complete, we'll be able to remove the
NEONPreAllocPass and avoid some fragile and hacky code elsewhere.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112108 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 89c32ea..86f64bc 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1260,6 +1260,11 @@
   Ops.push_back(MemAddr);
   Ops.push_back(Align);
 
+  // FIXME: This is a temporary flag to distinguish VSTs that have been
+  // converted to pseudo instructions.
+  bool usePseudoInstrs = (NumVecs == 4 &&
+                          VT.getSimpleVT().SimpleTy != MVT::v1i64);
+
   if (is64BitVector) {
     if (NumVecs >= 2) {
       SDValue RegSeq;
@@ -1278,6 +1283,9 @@
           : N->getOperand(3+3);
         RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
       }
+      if (usePseudoInstrs)
+        Ops.push_back(RegSeq);
+      else {
 
       // Now extract the D registers back out.
       Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
@@ -1290,15 +1298,16 @@
       if (NumVecs > 3)
         Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
                                                      RegSeq));
+      }
     } else {
-      for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-        Ops.push_back(N->getOperand(Vec+3));
+      Ops.push_back(N->getOperand(3));
     }
     Ops.push_back(Pred);
     Ops.push_back(Reg0); // predicate register
     Ops.push_back(Chain);
     unsigned Opc = DOpcodes[OpcodeIndex];
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+                                  usePseudoInstrs ? 6 : NumVecs+5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1363,6 +1372,9 @@
   // Store the even D registers.
   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
   Ops.push_back(Reg0); // post-access address offset
+  if (usePseudoInstrs)
+    Ops.push_back(RegSeq);
+  else
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
                                                  RegVT, RegSeq));
@@ -1371,18 +1383,24 @@
   Ops.push_back(Chain);
   unsigned Opc = QOpcodes0[OpcodeIndex];
   SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+6);
+                                        MVT::Other, Ops.data(),
+                                        usePseudoInstrs ? 7 : NumVecs+6);
   Chain = SDValue(VStA, 1);
 
   // Store the odd D registers.
   Ops[0] = SDValue(VStA, 0); // MemAddr
+  if (usePseudoInstrs)
+    Ops[6] = Chain;
+  else {
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
                                                 RegVT, RegSeq);
   Ops[NumVecs+5] = Chain;
+  }
   Opc = QOpcodes1[OpcodeIndex];
   SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+6);
+                                        MVT::Other, Ops.data(),
+                                        usePseudoInstrs ? 7 : NumVecs+6);
   Chain = SDValue(VStB, 1);
   ReplaceUses(SDValue(N, 0), Chain);
   return NULL;
@@ -2312,14 +2330,14 @@
     }
 
     case Intrinsic::arm_neon_vst4: {
-      unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
-                              ARM::VST4d32, ARM::VST1d64Q };
-      unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
-                               ARM::VST4q16_UPD,
-                               ARM::VST4q32_UPD };
-      unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
-                               ARM::VST4q16odd_UPD,
-                               ARM::VST4q32odd_UPD };
+      unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+                              ARM::VST4d32Pseudo, ARM::VST1d64Q };
+      unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+                               ARM::VST4q16Pseudo_UPD,
+                               ARM::VST4q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+                               ARM::VST4q16oddPseudo_UPD,
+                               ARM::VST4q32oddPseudo_UPD };
       return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }