diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 943952f..cea4939 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -738,7 +738,7 @@
             RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
     // FIXME: Neon instructions should support predicates
     if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
-      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
                      .addFrameIndex(FI).addImm(128)
                      .addMemOperand(MMO)
                      .addReg(SrcReg, getKillRegState(isKill)));
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 332ca3c..d0d940a 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -132,9 +132,9 @@
                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 
   /// SelectVST - Select NEON store intrinsics.  NumVecs should
-  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// stores of D registers and even subregs and odd subregs of Q registers.
-  /// For NumVecs == 2, QOpcodes1 is not used.
+  /// For NumVecs <= 2, QOpcodes1 is not used.
   SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 
@@ -1048,7 +1048,7 @@
   case MVT::v4f32:
   case MVT::v4i32: OpcodeIndex = 2; break;
   case MVT::v2i64: OpcodeIndex = 3;
-    assert(NumVecs == 1 && "v2i64 type only supported for VLD1/VST1");
+    assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
     break;
   }
 
@@ -1112,7 +1112,7 @@
 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
                                    unsigned *DOpcodes, unsigned *QOpcodes0,
                                    unsigned *QOpcodes1) {
-  assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
+  assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
   SDValue MemAddr, Align;
@@ -1137,6 +1137,9 @@
   case MVT::v8i16: OpcodeIndex = 1; break;
   case MVT::v4f32:
   case MVT::v4i32: OpcodeIndex = 2; break;
+  case MVT::v2i64: OpcodeIndex = 3;
+    assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+    break;
   }
 
   SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
@@ -1157,9 +1160,9 @@
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
-  if (NumVecs == 2) {
-    // Quad registers are directly supported for VST2,
-    // storing 2 pairs of D regs.
+  if (NumVecs <= 2) {
+    // Quad registers are directly supported for VST1 and VST2,
+    // storing pairs of D regs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
     for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
       Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
@@ -1170,7 +1173,8 @@
     Ops.push_back(Pred);
     Ops.push_back(Reg0); // predicate register
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+                                  5 + 2 * NumVecs);
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
@@ -1894,9 +1898,17 @@
       return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
+    case Intrinsic::arm_neon_vst1: {
+      unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+                              ARM::VST1d32, ARM::VST1d64 };
+      unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+                              ARM::VST1q32, ARM::VST1q64 };
+      return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
+    }
+
     case Intrinsic::arm_neon_vst2: {
       unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
-                              ARM::VST2d32, ARM::VST2d64 };
+                              ARM::VST2d32, ARM::VST1q64 };
       unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
       return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
     }
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 9156ff9..6d20e29 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -141,6 +141,7 @@
 } // mayLoad = 1
 
 // Use vstmia to store a Q register as a D register pair.
+// This is equivalent to VSTMD except that it has a Q register operand.
 def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
                 "vstmia", "$addr, ${src:dregpair}",
                 [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
@@ -151,6 +152,20 @@
   let Inst{11-8}  = 0b1011;
 }
 
+let mayStore = 1 in {
+// Use vst1 to store a Q register as a D register pair.
+// This alternative to VSTRQ allows an alignment to be specified.
+// This is equivalent to VST1q64 except that it has a Q register operand.
+def VST1q
+  : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
+          IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
+def VST1q_UPD
+  : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset, QPR:$src),
+          IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset",
+          "$addr.addr = $wb", []>;
+} // mayStore = 1
+
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 
 //   VLD1     : Vector Load (multiple single elements)
@@ -477,32 +492,27 @@
 //   FIXME: Not yet implemented.
 } // mayLoad = 1, hasExtraDefRegAllocReq = 1
 
-//   VST1     : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string Dt, ValueType Ty>
-  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
-          "vst1", Dt, "\\{$src\\}, $addr", "",
-          [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>;
-class VST1Q<bits<4> op7_4, string Dt, ValueType Ty>
-  : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
-          "vst1", Dt, "${src:dregpair}, $addr", "",
-          [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>;
-
-let hasExtraSrcRegAllocReq = 1 in {
-def  VST1d8   : VST1D<0b0000, "8",  v8i8>;
-def  VST1d16  : VST1D<0b0100, "16", v4i16>;
-def  VST1d32  : VST1D<0b1000, "32", v2i32>;
-def  VST1df   : VST1D<0b1000, "32", v2f32>;
-def  VST1d64  : VST1D<0b1100, "64", v1i64>;
-
-def  VST1q8   : VST1Q<0b0000, "8",  v16i8>;
-def  VST1q16  : VST1Q<0b0100, "16", v8i16>;
-def  VST1q32  : VST1Q<0b1000, "32", v4i32>;
-def  VST1qf   : VST1Q<0b1000, "32", v4f32>;
-def  VST1q64  : VST1Q<0b1100, "64", v2i64>;
-} // hasExtraSrcRegAllocReq
-
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
 
+//   VST1     : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
+          "vst1", Dt, "\\{$src\\}, $addr", "", []>;
+class VST1Q<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b00,0b1010,op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+          "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+
+def  VST1d8   : VST1D<0b0000, "8">;
+def  VST1d16  : VST1D<0b0100, "16">;
+def  VST1d32  : VST1D<0b1000, "32">;
+def  VST1d64  : VST1D<0b1100, "64">;
+
+def  VST1q8   : VST1Q<0b0000, "8">;
+def  VST1q16  : VST1Q<0b0100, "16">;
+def  VST1q32  : VST1Q<0b1000, "32">;
+def  VST1q64  : VST1Q<0b1100, "64">;
+
 // ...with address register writeback:
 class VST1DWB<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
@@ -582,9 +592,6 @@
 def  VST2d8   : VST2D<0b1000, 0b0000, "8">;
 def  VST2d16  : VST2D<0b1000, 0b0100, "16">;
 def  VST2d32  : VST2D<0b1000, 0b1000, "32">;
-def  VST2d64  : NLdSt<0,0b00,0b1010,0b1100, (outs),
-                      (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
-                      "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>;
 
 def  VST2q8   : VST2Q<0b0000, "8">;
 def  VST2q16  : VST2Q<0b0100, "16">;
@@ -606,11 +613,6 @@
 def VST2d8_UPD  : VST2DWB<0b1000, 0b0000, "8">;
 def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
 def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
-def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
-                        (ins addrmode6:$addr, am6offset:$offset,
-                         DPR:$src1, DPR:$src2), IIC_VST,
-                        "vst1", "64", "\\{$src1, $src2\\}, $addr$offset",
-                        "$addr.addr = $wb", []>;
 
 def VST2q8_UPD  : VST2QWB<0b0000, "8">;
 def VST2q16_UPD : VST2QWB<0b0100, "16">;
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index ce620fa..7334259 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -50,10 +50,6 @@
   case ARM::VLD1q16:
   case ARM::VLD1q32:
   case ARM::VLD1q64:
-    FirstOpnd = 0;
-    NumRegs = 2;
-    return true;
-
   case ARM::VLD2d8:
   case ARM::VLD2d16:
   case ARM::VLD2d32:
@@ -177,10 +173,13 @@
     Stride = 2;
     return true;
 
+  case ARM::VST1q8:
+  case ARM::VST1q16:
+  case ARM::VST1q32:
+  case ARM::VST1q64:
   case ARM::VST2d8:
   case ARM::VST2d16:
   case ARM::VST2d32:
-  case ARM::VST2d64:
   case ARM::VST2LNd8:
   case ARM::VST2LNd16:
   case ARM::VST2LNd32:
