Start converting NEON load/stores to use pseudo instructions, beginning here
with the VST4 instructions.  Until after register allocation, we want to
represent sets of adjacent registers by a single super-register.  These
VST4 pseudo instructions have a single QQ or QQQQ source register operand.
They get expanded to the real VST4 instructions with 4 separate D register
operands.  Once this conversion is complete, we'll be able to remove the
NEONPreAllocPass and avoid some fragile and hacky code elsewhere.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112108 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index e5f8a63..c71b093 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -24,6 +24,13 @@
 
 namespace {
   class ARMExpandPseudo : public MachineFunctionPass {
+    // Constants for register spacing in NEON load/store instructions.
+    enum NEONRegSpacing {
+      SingleSpc,
+      EvenDblSpc,
+      OddDblSpc
+    };
+
   public:
     static char ID;
     ARMExpandPseudo() : MachineFunctionPass(ID) {}
@@ -41,6 +48,8 @@
     void TransferImpOps(MachineInstr &OldMI,
                         MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
     bool ExpandMBB(MachineBasicBlock &MBB);
+    void ExpandVST4(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+                    bool hasWriteBack, NEONRegSpacing RegSpc);
   };
   char ARMExpandPseudo::ID = 0;
 }
@@ -63,6 +72,61 @@
   }
 }
 
+/// ExpandVST4 - Translate VST4 pseudo instructions with QQ or QQQQ register
+/// operands to real VST4 instructions with 4 D register operands.
+void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI,
+                                 unsigned Opc, bool hasWriteBack,
+                                 NEONRegSpacing RegSpc) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+  unsigned OpIdx = 0;
+  if (hasWriteBack) {
+    bool DstIsDead = MI.getOperand(OpIdx).isDead();
+    unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+    MIB.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead));
+  }
+  // Copy the addrmode6 operands.
+  bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+  MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+  MIB.addImm(MI.getOperand(OpIdx++).getImm());
+  if (hasWriteBack) {
+    // Copy the am6offset operand.
+    bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+    MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+  }
+
+  bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+  unsigned SrcReg = MI.getOperand(OpIdx).getReg();
+  unsigned D0, D1, D2, D3;
+  if (RegSpc == SingleSpc) {
+    D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+    D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+    D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+    D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+  } else if (RegSpc == EvenDblSpc) {
+    D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+    D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+    D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
+    D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
+  } else {
+    assert(RegSpc == OddDblSpc && "unknown register spacing for VST4");
+    D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+    D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+    D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
+    D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
+  } 
+
+  MIB.addReg(D0, getKillRegState(SrcIsKill))
+    .addReg(D1, getKillRegState(SrcIsKill))
+    .addReg(D2, getKillRegState(SrcIsKill))
+    .addReg(D3, getKillRegState(SrcIsKill));
+  MIB = AddDefaultPred(MIB);
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
   bool Modified = false;
 
@@ -71,9 +135,13 @@
     MachineInstr &MI = *MBBI;
     MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
 
+    bool ModifiedOp = true;
     unsigned Opcode = MI.getOpcode();
     switch (Opcode) {
-    default: break;
+    default:
+      ModifiedOp = false;
+      break;
+
     case ARM::tLDRpci_pic: 
     case ARM::t2LDRpci_pic: {
       unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
@@ -92,7 +160,6 @@
         .addOperand(MI.getOperand(2));
       TransferImpOps(MI, MIB1, MIB2);
       MI.eraseFromParent();
-      Modified = true;
       break;
     }
 
@@ -128,7 +195,6 @@
       HI16.addImm(Pred).addReg(PredReg);
       TransferImpOps(MI, LO16, HI16);
       MI.eraseFromParent();
-      Modified = true;
       break;
     }
 
@@ -155,9 +221,37 @@
                      .addReg(OddSrc, getKillRegState(SrcIsKill)));
       TransferImpOps(MI, Even, Odd);
       MI.eraseFromParent();
+    }
+
+    case ARM::VST4d8Pseudo:
+      ExpandVST4(MBBI, ARM::VST4d8, false, SingleSpc); break;
+    case ARM::VST4d16Pseudo:
+      ExpandVST4(MBBI, ARM::VST4d16, false, SingleSpc); break;
+    case ARM::VST4d32Pseudo:
+      ExpandVST4(MBBI, ARM::VST4d32, false, SingleSpc); break;
+    case ARM::VST4d8Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4d8_UPD, true, SingleSpc); break;
+    case ARM::VST4d16Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4d16_UPD, true, SingleSpc); break;
+    case ARM::VST4d32Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4d32_UPD, true, SingleSpc); break;
+    case ARM::VST4q8Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc); break;
+    case ARM::VST4q16Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc); break;
+    case ARM::VST4q32Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc); break;
+    case ARM::VST4q8oddPseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q8_UPD, true, OddDblSpc); break;
+    case ARM::VST4q16oddPseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q16_UPD, true, OddDblSpc); break;
+    case ARM::VST4q32oddPseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q32_UPD, true, OddDblSpc); break;
+      break;
+    }
+
+    if (ModifiedOp)
       Modified = true;
-    }
-    }
     MBBI = NMBBI;
   }