diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index c655d36..1a08f4d 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -49,25 +49,28 @@
 
       isVoid         =  13,   // This has no value
 
-      v8i8           =  14,   //  8 x i8
-      v4i16          =  15,   //  4 x i16
-      v2i32          =  16,   //  2 x i32
-      v1i64          =  17,   //  1 x i64
-      v16i8          =  18,   // 16 x i8
-      v8i16          =  19,   //  8 x i16
-      v3i32          =  20,   //  3 x i32
-      v4i32          =  21,   //  4 x i32
-      v2i64          =  22,   //  2 x i64
+      v2i8           =  14,   //  2 x i8
+      v4i8           =  15,   //  4 x i8
+      v2i16          =  16,   //  2 x i16
+      v8i8           =  17,   //  8 x i8
+      v4i16          =  18,   //  4 x i16
+      v2i32          =  19,   //  2 x i32
+      v1i64          =  20,   //  1 x i64
+      v16i8          =  21,   // 16 x i8
+      v8i16          =  22,   //  8 x i16
+      v3i32          =  23,   //  3 x i32
+      v4i32          =  24,   //  4 x i32
+      v2i64          =  25,   //  2 x i64
 
-      v2f32          =  23,   //  2 x f32
-      v3f32          =  24,   //  3 x f32
-      v4f32          =  25,   //  4 x f32
-      v2f64          =  26,   //  2 x f64
+      v2f32          =  26,   //  2 x f32
+      v3f32          =  27,   //  3 x f32
+      v4f32          =  28,   //  4 x f32
+      v2f64          =  29,   //  2 x f64
 
-      FIRST_VECTOR_VALUETYPE = v8i8,
+      FIRST_VECTOR_VALUETYPE = v2i8,
       LAST_VECTOR_VALUETYPE  = v2f64,
 
-      LAST_VALUETYPE =  27,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  30,   // This always remains at the end of the list.
 
       // iPTRAny - An int value the size of the pointer of the current
       // target to any address space. This must only be used internal to
@@ -166,10 +169,13 @@
       default:
         break;
       case i8:
+        if (NumElements == 2)  return v2i8;
+        if (NumElements == 4)  return v4i8;
         if (NumElements == 8)  return v8i8;
         if (NumElements == 16) return v16i8;
         break;
       case i16:
+        if (NumElements == 2)  return v2i16;
         if (NumElements == 4)  return v4i16;
         if (NumElements == 8)  return v8i16;
         break;
@@ -233,7 +239,7 @@
       return isSimple() ?
              ((SimpleTy >= FIRST_INTEGER_VALUETYPE &&
                SimpleTy <= LAST_INTEGER_VALUETYPE) ||
-              (SimpleTy >= v8i8 && SimpleTy <= v2i64)) :
+              (SimpleTy >= v2i8 && SimpleTy <= v2i64)) :
              isExtendedInteger();
     }
 
@@ -312,8 +318,11 @@
       switch (V) {
       default:
         return getExtendedVectorElementType();
+      case v2i8 :
+      case v4i8 :
       case v8i8 :
       case v16i8: return i8;
+      case v2i16:
       case v4i16:
       case v8i16: return i16;
       case v2i32:
@@ -338,11 +347,14 @@
       case v16i8: return 16;
       case v8i8 :
       case v8i16: return 8;
+      case v4i8:
       case v4i16:
       case v4i32:
       case v4f32: return 4;
       case v3i32:
       case v3f32: return 3;
+      case v2i8:
+      case v2i16:
       case v2i32:
       case v2i64:
       case v2f32:
@@ -364,9 +376,12 @@
         return getExtendedSizeInBits();
       case i1  :  return 1;
       case i8  :  return 8;
-      case i16 :  return 16;
+      case i16 :
+      case v2i8:  return 16;
       case f32 :
-      case i32 :  return 32;
+      case i32 :
+      case v4i8:
+      case v2i16: return 32;
       case f64 :
       case i64 :
       case v8i8:
@@ -407,6 +422,25 @@
         return getIntegerVT(1 << Log2_32_Ceil(BitWidth));
     }
 
+    /// isPow2VectorType - Retuns true if the given vector is a power of 2.
+    bool isPow2VectorType() const {
+      unsigned NElts = getVectorNumElements();
+      return !(NElts & (NElts - 1));
+    }
+
+    /// getPow2VectorType - Widens the length of the given vector MVT up to
+    /// the nearest power of 2 and returns that type.
+    MVT getPow2VectorType() const {
+      if (!isPow2VectorType()) {
+        unsigned NElts = getVectorNumElements();
+        unsigned Pow2NElts = 1 <<  Log2_32_Ceil(NElts);
+        return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
+      }
+      else {
+        return *this;
+      }
+   }
+
     /// getIntegerVTBitMask - Return an integer with 1's every place there are
     /// bits in the specified integer value type. FIXME: Should return an apint.
     uint64_t getIntegerVTBitMask() const {
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 844b8db..53ed0be 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -33,21 +33,24 @@
 def ppcf128: ValueType<128, 11>;   // PPC 128-bit floating point value
 def FlagVT : ValueType<0  , 12>;   // Condition code or machine flag
 def isVoid : ValueType<0  , 13>;   // Produces no value
-def v8i8   : ValueType<64 , 14>;   //  8 x i8  vector value
-def v4i16  : ValueType<64 , 15>;   //  4 x i16 vector value
-def v2i32  : ValueType<64 , 16>;   //  2 x i32 vector value
-def v1i64  : ValueType<64 , 17>;   //  1 x i64 vector value
+def v2i8   : ValueType<16 , 14>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 15>;   //  4 x i8  vector value
+def v2i16  : ValueType<32 , 16>;   //  2 x i16 vector value
+def v8i8   : ValueType<64 , 17>;   //  8 x i8  vector value
+def v4i16  : ValueType<64 , 18>;   //  4 x i16 vector value
+def v2i32  : ValueType<64 , 19>;   //  2 x i32 vector value
+def v1i64  : ValueType<64 , 20>;   //  1 x i64 vector value
 
-def v16i8  : ValueType<128, 18>;   // 16 x i8  vector value
-def v8i16  : ValueType<128, 19>;   //  8 x i16 vector value
-def v3i32  : ValueType<96 , 20>;   //  3 x i32 vector value
-def v4i32  : ValueType<128, 21>;   //  4 x i32 vector value
-def v2i64  : ValueType<128, 22>;   //  2 x i64 vector value
+def v16i8  : ValueType<128, 21>;   // 16 x i8  vector value
+def v8i16  : ValueType<128, 22>;   //  8 x i16 vector value
+def v3i32  : ValueType<96 , 23>;   //  3 x i32 vector value
+def v4i32  : ValueType<128, 24>;   //  4 x i32 vector value
+def v2i64  : ValueType<128, 25>;   //  2 x i64 vector value
 
-def v2f32  : ValueType<64,  23>;   //  2 x f32 vector value
-def v3f32  : ValueType<96 , 24>;   //  3 x f32 vector value
-def v4f32  : ValueType<128, 25>;   //  4 x f32 vector value
-def v2f64  : ValueType<128, 26>;   //  2 x f64 vector value
+def v2f32  : ValueType<64,  26>;   //  2 x f32 vector value
+def v3f32  : ValueType<96 , 27>;   //  3 x f32 vector value
+def v4f32  : ValueType<128, 28>;   //  4 x f32 vector value
+def v2f64  : ValueType<128, 29>;   //  2 x f64 vector value
 
 // Pseudo valuetype mapped to the current pointer size to any address space.
 // Should only be used in TableGen.
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 71a1d89..2c65588 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -168,8 +168,7 @@
     LegalizeAction getTypeAction(MVT VT) const {
       if (VT.isExtended()) {
         if (VT.isVector()) {
-          // First try vector widening
-          return Promote;
+          return VT.isPow2VectorType() ? Expand : Promote;
         }
         if (VT.isInteger())
           // First promote to a power-of-two size, then expand if necessary.
@@ -216,9 +215,15 @@
     }
 
     if (VT.isVector()) {
-      unsigned NumElts = VT.getVectorNumElements();
-      MVT EltVT = VT.getVectorElementType();
-      return (NumElts == 1) ? EltVT : MVT::getVectorVT(EltVT, NumElts / 2);
+      MVT NVT = VT.getPow2VectorType();
+      if (NVT == VT) {
+        // Vector length is a power of 2 - split to half the size.
+        unsigned NumElts = VT.getVectorNumElements();
+        MVT EltVT = VT.getVectorElementType();
+        return (NumElts == 1) ? EltVT : MVT::getVectorVT(EltVT, NumElts / 2);
+      }
+      // Promote to a power of two size, avoiding multi-step promotion.
+      return getTypeAction(NVT) == Promote ? getTypeToTransformTo(NVT) : NVT;
     } else if (VT.isInteger()) {
       MVT NVT = VT.getRoundIntegerType();
       if (NVT == VT)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 18ba912..4a2af06 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -491,9 +491,11 @@
   MVT VT = Op.getValueType();
   // If the type legalizer was run then we should never see any illegal result
   // types here except for target constants (the type legalizer does not touch
-  // those).
+  // those) or for build vector used as a mask for a vector shuffle.
+  // FIXME: We can removed the BUILD_VECTOR case when we fix PR2957.
   assert((TypesNeedLegalizing || getTypeAction(VT) == Legal ||
-          Op.getOpcode() == ISD::TargetConstant) &&
+          Op.getOpcode() == ISD::TargetConstant ||
+          Op.getOpcode() == ISD::BUILD_VECTOR) &&
          "Illegal type introduced after type legalization?");
   switch (getTypeAction(VT)) {
   default: assert(0 && "Bad type action!");
@@ -3299,7 +3301,7 @@
                          0);
         break;
       }
-      
+
       // Check to see if we have a libcall for this operator.
       RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
       bool isSigned = false;
@@ -7646,7 +7648,10 @@
   case ISD::XOR:
   case ISD::UREM:
   case ISD::SREM:
-  case ISD::FREM: {
+  case ISD::FREM:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL: {
     SDValue LL, LH, RL, RH;
     SplitVectorOp(Node->getOperand(0), LL, LH);
     SplitVectorOp(Node->getOperand(1), RL, RH);
@@ -8067,30 +8072,33 @@
     SDValue Tmp1 = Node->getOperand(0);
     // Converts between two different types so we need to determine
     // the correct widen type for the input operand.
-    MVT TVT = Tmp1.getValueType();
-    assert(TVT.isVector() && "can not widen non vector type");
-    MVT TEVT = TVT.getVectorElementType();
-    assert(WidenVT.getSizeInBits() % EVT.getSizeInBits() == 0 &&
-         "can not widen bit bit convert that are not multiple of element type");
-    MVT TWidenVT =  MVT::getVectorVT(TEVT,
-                                   WidenVT.getSizeInBits()/EVT.getSizeInBits());
-    Tmp1 = WidenVectorOp(Tmp1, TWidenVT);
-    assert(Tmp1.getValueType().getSizeInBits() == WidenVT.getSizeInBits());
-    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1);
+    MVT InVT = Tmp1.getValueType();
+    unsigned WidenSize = WidenVT.getSizeInBits();    
+    if (InVT.isVector()) {
+      MVT InEltVT = InVT.getVectorElementType();
+      unsigned InEltSize = InEltVT.getSizeInBits();
+      assert(WidenSize % InEltSize == 0 &&
+             "can not widen bit convert that are not multiple of element type");
+      MVT NewInWidenVT = MVT::getVectorVT(InEltVT, WidenSize / InEltSize);
+      Tmp1 = WidenVectorOp(Tmp1, NewInWidenVT);
+      assert(Tmp1.getValueType().getSizeInBits() == WidenVT.getSizeInBits());
+      Result = DAG.getNode(ISD::BIT_CONVERT, WidenVT, Tmp1);
+    } else {
+      // If the result size is a multiple of the input size, widen the input
+      // and then convert.
+      unsigned InSize = InVT.getSizeInBits();
+      assert(WidenSize % InSize == 0 &&
+             "can not widen bit convert that are not multiple of element type");
+      unsigned NewNumElts = WidenSize / InSize;
+      SmallVector<SDValue, 16> Ops(NewNumElts);
+      SDValue UndefVal = DAG.getNode(ISD::UNDEF, InVT);
+      Ops[0] = Tmp1;
+      for (unsigned i = 1; i < NewNumElts; ++i)
+        Ops[i] = UndefVal;
 
-    TargetLowering::LegalizeAction action =
-      TLI.getOperationAction(Node->getOpcode(), WidenVT);
-    switch (action)  {
-    default: assert(0 && "action not supported");
-    case TargetLowering::Legal:
-        break;
-    case TargetLowering::Promote:
-        // We defer the promotion to when we legalize the op
-      break;
-    case TargetLowering::Expand:
-      // Expand the operation into a bunch of nasty scalar code.
-      Result = LegalizeOp(UnrollVectorOp(Result));
-      break;
+      MVT NewInVT = MVT::getVectorVT(InVT, NewNumElts);
+      Result = DAG.getNode(ISD::BUILD_VECTOR, NewInVT, &Ops[0], NewNumElts);
+      Result = DAG.getNode(ISD::BIT_CONVERT, WidenVT, Result);
     }
     break;
   }
@@ -8098,7 +8106,8 @@
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
   case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT: {
+  case ISD::FP_TO_UINT:
+  case ISD::FP_ROUND: {
     SDValue Tmp1 = Node->getOperand(0);
     // Converts between two different types so we need to determine
     // the correct widen type for the input operand.
@@ -8118,7 +8127,6 @@
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:
-  case ISD::FP_ROUND:
   case ISD::SIGN_EXTEND_INREG:
   case ISD::FABS:
   case ISD::FNEG:
@@ -8129,7 +8137,7 @@
   case ISD::CTTZ:
   case ISD::CTLZ: {
     // Unary op widening
-    SDValue Tmp1;    
+    SDValue Tmp1;
     Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
     assert(Tmp1.getValueType() == WidenVT);
     Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1);
@@ -8306,7 +8314,7 @@
     MVT TmpWidenVT =  MVT::getVectorVT(TmpEVT, NewNumElts);
     Tmp1 = WidenVectorOp(Tmp1, TmpWidenVT);
     SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), TmpWidenVT);
-    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1, Tmp2, 
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1, Tmp2,
                          Node->getOperand(2));
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8abf144..5f23677 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -217,7 +217,7 @@
     // Convert the element to an integer and promote it by hand.
     return DAG.getNode(ISD::ANY_EXTEND, NOutVT,
                        BitConvertToInteger(GetScalarizedVector(InOp)));
-  case SplitVector:
+  case SplitVector: {
     // For example, i32 = BIT_CONVERT v2i16 on alpha.  Convert the split
     // pieces of the input into integers and reassemble in the final type.
     SDValue Lo, Hi;
@@ -233,9 +233,13 @@
                        JoinIntegers(Lo, Hi));
     return DAG.getNode(ISD::BIT_CONVERT, NOutVT, InOp);
   }
+  case WidenVector:
+    if (OutVT.bitsEq(NInVT))
+      // The input is widened to the same size.  Convert to the widened value.
+      return DAG.getNode(ISD::BIT_CONVERT, OutVT, GetWidenedVector(InOp));
+  }
 
   // Otherwise, lower the bit-convert to a store/load from the stack.
-
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and destination types.
   SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 8454dda..704c537 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -108,6 +108,8 @@
         Mapped |= 32;
       if (SplitVectors.find(Res) != SplitVectors.end())
         Mapped |= 64;
+      if (WidenedVectors.find(Res) != WidenedVectors.end())
+        Mapped |= 128;
 
       if (I->getNodeId() != Processed) {
         if (Mapped != 0) {
@@ -115,7 +117,10 @@
           Failed = true;
         }
       } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
-        if (Mapped > 1) {
+        // FIXME: Because of PR2957, the build vector can be placed on this
+        // list but if the associated vector shuffle is split, the build vector
+        // can also be split so we allow this to go through for now.
+        if (Mapped > 1 && Res.getOpcode() != ISD::BUILD_VECTOR) {
           cerr << "Value with legal type was transformed!";
           Failed = true;
         }
@@ -144,6 +149,8 @@
           cerr << " ExpandedFloats";
         if (Mapped & 64)
           cerr << " SplitVectors";
+        if (Mapped & 128)
+          cerr << " WidenedVectors";
         cerr << "\n";
         abort();
       }
@@ -241,6 +248,10 @@
         SplitVectorResult(N, i);
         Changed = true;
         goto NodeDone;
+      case WidenVector:
+        WidenVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
       }
     }
 
@@ -255,6 +266,13 @@
       if (IgnoreNodeResults(N->getOperand(i).getNode()))
         continue;
 
+      if (N->getOpcode() == ISD::VECTOR_SHUFFLE && i == 2) {
+        // The shuffle mask doesn't need to be a legal vector type.
+        // FIXME: We can remove this once we fix PR2957.
+        SetIgnoredNodeResult(N->getOperand(2).getNode());
+        continue;
+      }
+
       MVT OpVT = N->getOperand(i).getValueType();
       switch (getTypeAction(OpVT)) {
       default:
@@ -288,6 +306,10 @@
         NeedsReanalyzing = SplitVectorOperand(N, i);
         Changed = true;
         break;
+      case WidenVector:
+        NeedsReanalyzing = WidenVectorOperand(N, i);
+        Changed = true;
+        break;
       }
       break;
     }
@@ -791,6 +813,18 @@
   Entry.second = Hi;
 }
 
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = WidenedVectors[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+  OpEntry = Result;
+}
+
+// Set to ignore result
+void DAGTypeLegalizer::SetIgnoredNodeResult(SDNode* N) {
+  IgnoredNodesResultsSet.insert(N);
+}
 
 //===----------------------------------------------------------------------===//
 // Utilities.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9a42c56..2d4cde8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 
@@ -63,7 +64,8 @@
     SoftenFloat,     // Convert this float type to a same size integer type.
     ExpandFloat,     // Split this float type into two of half the size.
     ScalarizeVector, // Replace this one-element vector with its element type.
-    SplitVector      // This vector type should be split into smaller vectors.
+    SplitVector,     // This vector type should be split into smaller vectors.
+    WidenVector      // This vector type should be widened into larger vectors.
   };
 
   /// ValueTypeActions - This is a bitvector that contains two bits for each
@@ -88,11 +90,8 @@
       //   2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
       if (!VT.isVector())
         return PromoteInteger;
-      else if (VT.getVectorNumElements() == 1)
-        return ScalarizeVector;
       else
-        // TODO: move widen code to LegalizeTypes.
-        return SplitVector;
+        return WidenVector;
     case TargetLowering::Expand:
       // Expand can mean
       // 1) split scalar in half, 2) convert a float to an integer,
@@ -120,9 +119,13 @@
 
   /// IgnoreNodeResults - Pretend all of this node's results are legal.
   bool IgnoreNodeResults(SDNode *N) const {
-    return N->getOpcode() == ISD::TargetConstant;
+    return N->getOpcode() == ISD::TargetConstant ||
+           IgnoredNodesResultsSet.count(N);
   }
 
+  /// IgnoredNode - Set of nodes whose result don't need to be legal.
+  DenseSet<SDNode*> IgnoredNodesResultsSet;
+
   /// PromotedIntegers - For integer nodes that are below legal width, this map
   /// indicates what promoted value to use.
   DenseMap<SDValue, SDValue> PromotedIntegers;
@@ -147,6 +150,10 @@
   /// which operands are the expanded version of the input.
   DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
 
+  /// WidenVectors - For vector nodes that need to be widened, indicates
+  /// the widen value to use.
+  DenseMap<SDValue, SDValue> WidenedVectors;
+
   /// ReplacedValues - For values that have been replaced with another,
   /// indicates the replacement value to use.
   DenseMap<SDValue, SDValue> ReplacedValues;
@@ -201,6 +208,8 @@
 
   SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index);
 
+  void SetIgnoredNodeResult(SDNode* N);
+
   //===--------------------------------------------------------------------===//
   // Integer Promotion Support: LegalizeIntegerTypes.cpp
   //===--------------------------------------------------------------------===//
@@ -563,6 +572,91 @@
   SDValue SplitVecOp_VECTOR_SHUFFLE(SDNode *N, unsigned OpNo);
 
   //===--------------------------------------------------------------------===//
+  // Vector Widening Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+  SDValue GetWidenedVector(SDValue Op) {
+    SDValue &WidenedOp = WidenedVectors[Op];
+    RemapValue(WidenedOp);
+    assert(WidenedOp.getNode() && "Operand wasn't widened?");
+    return WidenedOp;
+  }
+  void SetWidenedVector(SDValue Op, SDValue Result);
+
+  // Widen Vector Result Promotion.
+  void WidenVectorResult(SDNode *N, unsigned ResNo);
+  SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+  SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+  SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+  SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+  SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+  SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_LOAD(SDNode* N);
+  SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+  SDValue WidenVecRes_SELECT(SDNode* N);
+  SDValue WidenVecRes_SELECT_CC(SDNode* N);
+  SDValue WidenVecRes_UNDEF(SDNode *N);
+  SDValue WidenVecRes_VECTOR_SHUFFLE(SDNode *N);
+  SDValue WidenVecRes_VSETCC(SDNode* N);
+
+  SDValue WidenVecRes_Binary(SDNode *N);
+  SDValue WidenVecRes_Convert(SDNode *N);
+  SDValue WidenVecRes_Shift(SDNode *N);
+  SDValue WidenVecRes_Unary(SDNode *N);
+
+  // Widen Vector Operand.
+  bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+  SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue WidenVecOp_STORE(SDNode* N);
+
+  SDValue WidenVecOp_Convert(SDNode *N);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// Helper genWidenVectorLoads - Helper function to generate a set of
+  /// loads to load a vector with a resulting wider type. It takes
+  ///   ExtType: Extension type
+  ///   LdChain: list of chains for the load we have generated.
+  ///   Chain:   incoming chain for the ld vector.
+  ///   BasePtr: base pointer to load from.
+  ///   SV:         memory disambiguation source value.
+  ///   SVOffset:   memory disambiugation offset.
+  ///   Alignment:  alignment of the memory.
+  ///   isVolatile: volatile load.
+  ///   LdWidth:    width of memory that we want to load. 
+  ///   ResType:    the wider result result type for the resulting vector.
+  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain,
+                              SDValue BasePtr, const Value *SV,
+                              int SVOffset, unsigned Alignment,
+                              bool isVolatile, unsigned LdWidth,
+                              MVT ResType);
+
+  /// Helper genWidenVectorStores - Helper function to generate a set of
+  /// stores to store a widen vector into non widen memory
+  /// It takes
+  ///   StChain: list of chains for the stores we have generated
+  ///   Chain:   incoming chain for the ld vector
+  ///   BasePtr: base pointer to load from
+  ///   SV:      memory disambiguation source value
+  ///   SVOffset:   memory disambiugation offset
+  ///   Alignment:  alignment of the memory
+  ///   isVolatile: volatile lod
+  ///   ValOp:   value to store  
+  ///   StWidth: width of memory that we want to store 
+  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain,
+                            SDValue BasePtr, const Value *SV,
+                            int SVOffset, unsigned Alignment,
+                            bool isVolatile, SDValue ValOp,
+                            unsigned StWidth);
+
+  /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+  /// input vector must have the same element type as NVT.
+  SDValue ModifyToType(SDValue InOp, MVT WidenVT);
+
+
+  //===--------------------------------------------------------------------===//
   // Generic Splitting: LegalizeTypesGeneric.cpp
   //===--------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5f15fa5..5c34db4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -371,7 +371,7 @@
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
-  case ISD::LOAD:           SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
+  case ISD::LOAD:              SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
   case ISD::VECTOR_SHUFFLE:    SplitVecRes_VECTOR_SHUFFLE(N, Lo, Hi); break;
   case ISD::VSETCC:            SplitVecRes_VSETCC(N, Lo, Hi); break;
 
@@ -1062,3 +1062,1034 @@
   assert(false && "Failed to find an appropriate mask type!");
   return SDValue(N, 0);
 }
+
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG);
+        cerr << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    cerr << "WidenVectorResult #" << ResNo << ": ";
+    N->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to widen the result of this operator!");
+    abort();
+
+  case ISD::BIT_CONVERT:       Res = WidenVecRes_BIT_CONVERT(N); break;
+  case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
+  case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
+  case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+  case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
+  case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
+  case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
+  case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
+  case ISD::VECTOR_SHUFFLE:    Res = WidenVecRes_VECTOR_SHUFFLE(N); break;
+  case ISD::VSETCC:            Res = WidenVecRes_VSETCC(N); break;
+
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::BSWAP:
+  case ISD::FADD:
+  case ISD::FCOPYSIGN:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FPOW:
+  case ISD::FPOWI:
+  case ISD::FREM:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
+  case ISD::OR:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::SUB:
+  case ISD::XOR:               Res = WidenVecRes_Binary(N); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:               Res = WidenVecRes_Shift(N); break;
+
+  case ISD::ANY_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SIGN_EXTEND:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::ZERO_EXTEND:
+  case ISD::UINT_TO_FP:        Res = WidenVecRes_Convert(N); break;
+
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCOS:
+  case ISD::FNEG:
+  case ISD::FSIN:
+  case ISD::FSQRT:             Res = WidenVecRes_Unary(N); break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (Res.getNode())
+    SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+  // Binary op widening.
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  MVT InVT = InOp.getValueType();
+  MVT InEltVT = InVT.getVectorElementType();
+  MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+
+  unsigned Opcode = N->getOpcode();
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+
+  if (getTypeAction(InVT) == WidenVector) {
+    InOp = GetWidenedVector(N->getOperand(0));
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts)
+      return DAG.getNode(Opcode, WidenVT, InOp);
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getNode(ISD::UNDEF, InVT);
+      for (unsigned i = 1; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      return DAG.getNode(Opcode, WidenVT,
+                         DAG.getNode(ISD::CONCAT_VECTORS, InWidenVT,
+                         &Ops[0], NumConcat));
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      // Extract the input and convert the shorten input vector.
+      return DAG.getNode(Opcode, WidenVT,
+                         DAG.getNode(ISD::EXTRACT_SUBVECTOR, InWidenVT, InOp,
+                                     DAG.getIntPtrConstant(0)));
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  MVT EltVT = WidenVT.getVectorElementType();
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i)
+    Ops[i] = DAG.getNode(Opcode, EltVT,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, InEltVT, InOp,
+                                     DAG.getIntPtrConstant(i)));
+
+  SDValue UndefVal = DAG.getNode(ISD::UNDEF, EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  SDValue ShOp = N->getOperand(1);
+
+  MVT ShVT = ShOp.getValueType();
+  if (getTypeAction(ShVT) == WidenVector) {
+    ShOp = GetWidenedVector(ShOp);
+    ShVT = ShOp.getValueType();
+  }
+  MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(),
+                                   WidenVT.getVectorNumElements());
+  if (ShVT != ShWidenVT) 
+    ShOp = ModifyToType(ShOp, ShWidenVT);
+    
+  return DAG.getNode(N->getOpcode(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+  // Unary op widening.
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  MVT InVT = InOp.getValueType();
+  MVT VT = N->getValueType(0);
+  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+    break;
+  case Legal:
+    break;
+  case PromoteInteger:
+    // If the InOp is promoted to the same size, convert it.  Otherwise,
+    // fall out of the switch and widen the promoted input.
+    InOp = GetPromotedInteger(InOp);
+    InVT = InOp.getValueType();
+    if (WidenVT.bitsEq(InVT))
+      return DAG.getNode(ISD::BIT_CONVERT, WidenVT, InOp);
+    break;
+  case SoftenFloat:
+  case ExpandInteger:
+  case ExpandFloat:
+  case ScalarizeVector:
+  case SplitVector:
+    break;
+  case WidenVector:
+    // If the InOp is widened to the same size, convert it.  Otherwise, fall
+    // out of the switch and widen the widened input.
+    InOp = GetWidenedVector(InOp);
+    InVT = InOp.getValueType();
+    if (WidenVT.bitsEq(InVT))
+      // The input widens to the same size. Convert to the widen value.
+      return DAG.getNode(ISD::BIT_CONVERT, WidenVT, InOp);
+    break;
+  }
+
+  unsigned WidenSize = WidenVT.getSizeInBits();
+  unsigned InSize = InVT.getSizeInBits();
+  if (WidenSize % InSize == 0) {
+    // Determine new input vector type.  The new input vector type will use
+    // the same element type (if its a vector) or use the input type as a
+    // vector.  It is the same size as the type to widen to.
+    MVT NewInVT;
+    unsigned NewNumElts = WidenSize / InSize;
+    if (InVT.isVector()) {
+      MVT InEltVT = InVT.getVectorElementType();
+      NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits());
+    } else {
+      NewInVT = MVT::getVectorVT(InVT, NewNumElts);
+    }
+
+    if (TLI.isTypeLegal(NewInVT)) {
+      // Because the result and the input are different vector types, widening
+      // the result could create a legal type but widening the input might make
+      // it an illegal type that might lead to repeatedly splitting the input
+      // and then widening it. To avoid this, we widen the input only if
+      // it results in a legal type.
+      SmallVector<SDValue, 16> Ops(NewNumElts);
+      SDValue UndefVal = DAG.getNode(ISD::UNDEF, InVT);
+      Ops[0] = InOp;
+      for (unsigned i = 1; i < NewNumElts; ++i)
+        Ops[i] = UndefVal;
+
+      SDValue NewVec;
+      if (InVT.isVector())
+        NewVec = DAG.getNode(ISD::CONCAT_VECTORS, NewInVT, &Ops[0], NewNumElts);
+      else
+        NewVec = DAG.getNode(ISD::BUILD_VECTOR, NewInVT, &Ops[0], NewNumElts);
+      return DAG.getNode(ISD::BIT_CONVERT, WidenVT, NewVec);
+    }
+  }
+
+  // This should occur rarely. Lower the bit-convert to a store/load
+  // from the stack. Create the stack frame object.  Make sure it is aligned
+  // for both the source and destination types.
+  SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT);
+
+  // Emit a store to the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), InOp, FIPtr, NULL, 0);
+
+  // Result is a load from the stack slot.
+  return DAG.getLoad(WidenVT, Store, FIPtr, NULL, 0);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+  // Build a vector with undefined for the new nodes.
+  MVT VT = N->getValueType(0);
+  MVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+  NewOps.reserve(WidenNumElts);
+  for (unsigned i = NumElts; i < WidenNumElts; ++i)
+    NewOps.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+
+  return DAG.getNode(ISD::BUILD_VECTOR, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+  MVT InVT = N->getOperand(0).getValueType();
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  unsigned NumOperands = N->getNumOperands();
+  
+  bool InputWidened = false; // Indicates we need to widen the input.
+  if (getTypeAction(InVT) != WidenVector) {
+    if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+      // Add undef vectors to widen to correct length.
+      unsigned NumConcat = WidenVT.getVectorNumElements() / 
+                           InVT.getVectorNumElements();
+      SDValue UndefVal = DAG.getNode(ISD::UNDEF, InVT);
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      for (unsigned i=0; i < NumOperands; ++i)
+        Ops[i] = N->getOperand(i);
+      for (unsigned i = NumOperands; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      return DAG.getNode(ISD::CONCAT_VECTORS, WidenVT, &Ops[0], NumConcat);
+    }
+  } else {
+    InputWidened = true;
+    if (WidenVT == TLI.getTypeToTransformTo(InVT)) {
+      // The inputs and the result are widen to the same value.
+      unsigned i;
+      for (i=1; i < NumOperands; ++i)
+        if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+          break;
+
+      if (i > NumOperands)
+        // Everything but the first operand is an UNDEF so just return the
+        // widened first operand.
+        return GetWidenedVector(N->getOperand(0));
+
+      if (NumOperands == 2) {
+        // Replace concat of two operands with a shuffle.
+        MVT PtrVT = TLI.getPointerTy();
+        SmallVector<SDValue, 16> MaskOps(WidenNumElts);
+        for (unsigned i=0; i < WidenNumElts/2; ++i) {
+          MaskOps[i] = DAG.getConstant(i, PtrVT);
+          MaskOps[i+WidenNumElts/2] = DAG.getConstant(i+WidenNumElts, PtrVT);
+        }
+        SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR,
+                                   MVT::getVectorVT(PtrVT, WidenNumElts),
+                                   &MaskOps[0], WidenNumElts);
+        return DAG.getNode(ISD::VECTOR_SHUFFLE, WidenVT,
+                           GetWidenedVector(N->getOperand(0)),
+                           GetWidenedVector(N->getOperand(1)), Mask);
+      }
+    }
+  }
+  
+  // Fall back to use extracts and build vector.
+  MVT EltVT = WidenVT.getVectorElementType();
+  unsigned NumInElts = InVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  unsigned Idx = 0;
+  for (unsigned i=0; i < NumOperands; ++i) {
+    SDValue InOp = N->getOperand(i);
+    if (InputWidened)
+      InOp = GetWidenedVector(InOp);
+    for (unsigned j=0; j < NumInElts; ++j)
+        Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InOp,
+                                 DAG.getIntPtrConstant(j));
+  }
+  SDValue UndefVal = DAG.getNode(ISD::UNDEF, EltVT);
+  for (; Idx < WidenNumElts; ++Idx)
+    Ops[Idx] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+  SDValue InOp  = N->getOperand(0);
+  SDValue RndOp = N->getOperand(3);
+  SDValue SatOp = N->getOperand(4);
+
+  MVT      WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  MVT InVT = InOp.getValueType();
+  MVT InEltVT = InVT.getVectorElementType();
+  MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+
+  SDValue DTyOp = DAG.getValueType(WidenVT);
+  SDValue STyOp = DAG.getValueType(InWidenVT);
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+  if (getTypeAction(InVT) == WidenVector) {
+    InOp = GetWidenedVector(InOp);
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts)
+      return DAG.getConvertRndSat(WidenVT, InOp, DTyOp, STyOp, RndOp,
+                                  SatOp, CvtCode);
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getNode(ISD::UNDEF, InVT);
+      for (unsigned i = 1; i != NumConcat; ++i) {
+        Ops[i] = UndefVal;
+      }
+      InOp = DAG.getNode(ISD::CONCAT_VECTORS, InWidenVT, &Ops[0], NumConcat);
+      return DAG.getConvertRndSat(WidenVT, InOp, DTyOp, STyOp, RndOp,
+                                  SatOp, CvtCode);
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      // Extract the input and convert the shorten input vector.
+      InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, InWidenVT, InOp,
+                         DAG.getIntPtrConstant(0));
+      return DAG.getConvertRndSat(WidenVT, InOp, DTyOp, STyOp, RndOp,
+                                SatOp, CvtCode);
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  MVT EltVT = WidenVT.getVectorElementType();
+  DTyOp = DAG.getValueType(EltVT);
+  STyOp = DAG.getValueType(InEltVT);
+
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i) {
+    SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, InEltVT, InOp,
+                                 DAG.getIntPtrConstant(i));
+    Ops[i] = DAG.getConvertRndSat(WidenVT, ExtVal, DTyOp, STyOp, RndOp,
+                                        SatOp, CvtCode);
+  }
+
+  SDValue UndefVal = DAG.getNode(ISD::UNDEF, EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  MVT      VT = N->getValueType(0);
+  MVT      WidenVT = TLI.getTypeToTransformTo(VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SDValue  InOp = N->getOperand(0);
+  SDValue  Idx  = N->getOperand(1);
+
+  if (getTypeAction(InOp.getValueType()) == WidenVector)
+    InOp = GetWidenedVector(InOp);
+
+  MVT InVT = InOp.getValueType();
+
+  ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
+  if (CIdx) {
+    unsigned IdxVal = CIdx->getZExtValue();
+    // Check if we can just return the input vector after widening.
+    if (IdxVal == 0 && InVT == WidenVT)
+      return InOp;
+
+    // Check if we can extract from the vector.
+    unsigned InNumElts = InVT.getVectorNumElements();
+    if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, WidenVT, InOp, Idx);
+  }
+
+  // We could try widening the input to the right length but for now, extract
+  // the original elements, fill the rest with undefs and build a vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  MVT EltVT = VT.getVectorElementType();
+  MVT IdxVT = Idx.getValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned i;
+  if (CIdx) {
+    unsigned IdxVal = CIdx->getZExtValue();
+    for (i=0; i < NumElts; ++i)
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InOp,
+                           DAG.getConstant(IdxVal+i, IdxVT));
+  } else {
+    Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InOp, Idx);
+    for (i=1; i < NumElts; ++i) {
+      SDValue NewIdx = DAG.getNode(ISD::ADD, Idx.getValueType(), Idx,
+                                   DAG.getConstant(i, IdxVT));
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InOp, NewIdx);
+    }
+  }
+
+  SDValue UndefVal = DAG.getNode(ISD::UNDEF, EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, InOp.getValueType(), InOp,
+                     N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+  MVT LdVT    = LD->getMemoryVT();
+  assert(LdVT.isVector() && WidenVT.isVector());
+
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  int       SVOffset = LD->getSrcValueOffset();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  const Value *SV = LD->getSrcValue();
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+
+  SDValue Result;
+  SmallVector<SDValue, 16> LdChain;  // Chain for the series of load
+  if (ExtType != ISD::NON_EXTLOAD) {
+    // For extension loads, we can not play the tricks of chopping legal
+    // vector types and bit cast it to the right type.  Instead, we unroll
+    // the load and build a vector.
+    MVT EltVT = WidenVT.getVectorElementType();
+    MVT LdEltVT = LdVT.getVectorElementType();
+    unsigned NumElts = LdVT.getVectorNumElements();
+    
+    // Load each element and widen
+    unsigned WidenNumElts = WidenVT.getVectorNumElements();  
+    SmallVector<SDValue, 16> Ops(WidenNumElts);
+    unsigned Increment = LdEltVT.getSizeInBits() / 8;
+    Ops[0] = DAG.getExtLoad(ExtType, EltVT, Chain, BasePtr, SV, SVOffset,
+                            LdEltVT, isVolatile, Align);
+    LdChain.push_back(Ops[0].getValue(1));
+    unsigned i = 0, Offset = Increment;
+    for (i=1; i < NumElts; ++i, Offset += Increment) {
+      SDValue NewBasePtr = DAG.getNode(ISD::ADD, BasePtr.getValueType(), 
+                                       BasePtr, DAG.getIntPtrConstant(Offset));
+      Ops[i] = DAG.getExtLoad(ExtType, EltVT, Chain, NewBasePtr, SV,
+                              SVOffset + Offset, LdEltVT, isVolatile, Align);
+      LdChain.push_back(Ops[i].getValue(1));
+    }
+
+    // Fill the rest with undefs
+    SDValue UndefVal = DAG.getNode(ISD::UNDEF, EltVT);
+    for (; i != WidenNumElts; ++i)
+      Ops[i] = UndefVal;
+
+    Result =  DAG.getNode(ISD::BUILD_VECTOR, WidenVT, &Ops[0], Ops.size());
+  } else {
+    assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+    unsigned int LdWidth = LdVT.getSizeInBits();
+    Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset,
+                                 Align, isVolatile, LdWidth, WidenVT);
+}
+
+ // If we generate a single load, we can use that for the chain.  Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+   NewChain = LdChain[0];
+ else
+   NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &LdChain[0],
+                          LdChain.size());
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), Chain);
+
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue Cond1 = N->getOperand(0);
+  MVT CondVT = Cond1.getValueType();
+  if (CondVT.isVector()) {
+    MVT CondEltVT = CondVT.getVectorElementType();
+    MVT CondWidenVT =  MVT::getVectorVT(CondEltVT, WidenNumElts);
+    if (getTypeAction(CondVT) == WidenVector)
+      Cond1 = GetWidenedVector(Cond1);
+
+    if (Cond1.getValueType() != CondWidenVT)
+       Cond1 = ModifyToType(Cond1, CondWidenVT);
+  }
+
+  SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+  assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+  return DAG.getNode(ISD::SELECT, WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+  SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(3));  
+  return DAG.getNode(ISD::SELECT_CC, InOp1.getValueType(), N->getOperand(0),
+                     N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ return DAG.getNode(ISD::UNDEF, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) {
+  MVT VT = N->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+
+  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+  // Adjust mask based on new input vector length.
+  SDValue Mask = N->getOperand(2);
+  SmallVector<SDValue, 16> MaskOps(WidenNumElts);
+  MVT IdxVT = Mask.getValueType().getVectorElementType();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Arg = Mask.getOperand(i);
+    if (Arg.getOpcode() == ISD::UNDEF)
+      MaskOps[i] = Arg;
+    else {
+      unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
+      if (Idx < NumElts)
+        MaskOps[i] = Arg;
+      else
+        MaskOps[i] = DAG.getConstant(Idx - NumElts + WidenNumElts, IdxVT);
+    }
+  }
+  for (unsigned i = NumElts; i < WidenNumElts; ++i)
+    MaskOps[i] = DAG.getNode(ISD::UNDEF, IdxVT);
+  SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, 
+                                MVT::getVectorVT(IdxVT, WidenNumElts),
+                                &MaskOps[0], WidenNumElts); 
+
+  return DAG.getNode(ISD::VECTOR_SHUFFLE, WidenVT, InOp1, InOp2, NewMask);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue InOp1 = N->getOperand(0);
+  MVT InVT = InOp1.getValueType();
+  assert(InVT.isVector() && "can not widen non vector type");
+  MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts);
+  InOp1 = GetWidenedVector(InOp1);
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+  // Assume that the input and output will be widen appropriately.  If not,
+  // we will have to unroll it at some point.
+  assert(InOp1.getValueType() == WidenInVT &&
+         InOp2.getValueType() == WidenInVT &&
+         "Input not widened to expected type!");
+  return DAG.getNode(ISD::VSETCC, WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
+  DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG);
+        cerr << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    cerr << "WidenVectorOperand op #" << ResNo << ": ";
+    N->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to widen this operator's operand!");
+    abort();
+
+  case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
+  case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
+  
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::UINT_TO_FP:         Res = WidenVecOp_Convert(N); break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+  // Since the result is legal and the input is illegal, it is unlikely
+  // that we can fix the input to a legal type so unroll the convert
+  // into some scalar code and create a nasty build vector.
+  MVT VT = N->getValueType(0);
+  MVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue InOp = N->getOperand(0);
+  if (getTypeAction(InOp.getValueType()) == WidenVector)
+    InOp = GetWidenedVector(InOp);
+  MVT InVT = InOp.getValueType();
+  MVT InEltVT = InVT.getVectorElementType();
+
+  unsigned Opcode = N->getOpcode();
+  SmallVector<SDValue, 16> Ops(NumElts);
+  for (unsigned i=0; i < NumElts; ++i)
+    Ops[i] = DAG.getNode(Opcode, EltVT,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, InEltVT, InOp,
+                                     DAG.getIntPtrConstant(i)));
+
+  return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], NumElts); 
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+  // If the input vector is not legal, it is likely that we will not find a
+  // legal vector of the same size. Replace the concatenate vector with a
+  // nasty build vector.
+  MVT VT = N->getValueType(0);
+  MVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(NumElts);
+
+  MVT InVT = N->getOperand(0).getValueType();
+  unsigned NumInElts = InVT.getVectorNumElements();
+
+  unsigned Idx = 0;
+  unsigned NumOperands = N->getNumOperands();
+  for (unsigned i=0; i < NumOperands; ++i) {
+    SDValue InOp = N->getOperand(i);
+    if (getTypeAction(InOp.getValueType()) == WidenVector)
+      InOp = GetWidenedVector(InOp);
+    for (unsigned j=0; j < NumInElts; ++j)
+      Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InOp,
+                               DAG.getIntPtrConstant(j));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  MVT EltVT = InOp.getValueType().getVectorElementType();
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+  // We have to widen the value but we want only to store the original
+  // vector type.
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  const    Value *SV = ST->getSrcValue();
+  int      SVOffset = ST->getSrcValueOffset();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  
+  MVT StVT = ST->getMemoryVT();
+  MVT ValVT = ValOp.getValueType();
+  // It must be true that we the widen vector type is bigger than where
+  // we need to store.
+  assert(StVT.isVector() && ValOp.getValueType().isVector());
+  assert(StVT.getSizeInBits() < ValOp.getValueType().getSizeInBits());
+
+  SmallVector<SDValue, 16> StChain;
+  if (ST->isTruncatingStore()) {
+    // For truncating stores, we can not play the tricks of chopping legal
+    // vector types and bit cast it to the right type.  Instead, we unroll
+    // the store.
+    MVT StEltVT  = StVT.getVectorElementType();
+    MVT ValEltVT = ValVT.getVectorElementType();
+    unsigned Increment = ValEltVT.getSizeInBits() / 8;
+    unsigned NumElts = StVT.getVectorNumElements();
+    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, ValEltVT, ValOp,
+                              DAG.getIntPtrConstant(0));
+    StChain.push_back(DAG.getTruncStore(Chain, EOp, BasePtr, SV,
+                                        SVOffset, StEltVT,
+                                        isVolatile, Align));
+    unsigned Offset = Increment;
+    for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+      SDValue NewBasePtr = DAG.getNode(ISD::ADD, BasePtr.getValueType(),
+                                       BasePtr, DAG.getIntPtrConstant(Offset));
+      SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, ValEltVT, ValOp,
+                              DAG.getIntPtrConstant(0));
+      StChain.push_back(DAG.getTruncStore(Chain, EOp, NewBasePtr, SV,
+                                          SVOffset + Offset, StEltVT,
+                                          isVolatile, MinAlign(Align, Offset))); 
+    }
+  }
+  else {
+    assert(StVT.getVectorElementType() == ValVT.getVectorElementType());
+    // Store value
+    GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset,
+                         Align, isVolatile, ValOp, StVT.getSizeInBits());
+  }
+  if (StChain.size() == 1)
+    return StChain[0];
+  else 
+    return DAG.getNode(ISD::TokenFactor, MVT::Other,&StChain[0],StChain.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+
+// Utility function to find a vector type and its associated element
+// type from a preferred width and whose vector type must be the same size
+// as the VecVT.
+//  TLI:   Target lowering used to determine legal types.
+//  Width: Preferred width to store.
+//  VecVT: Vector value type whose size we must match.
+// Returns NewVecVT and NewEltVT - the vector type and its associated
+// element type.
+static void FindAssocWidenVecType(TargetLowering &TLI, unsigned Width, MVT VecVT,
+                                  MVT& NewEltVT, MVT& NewVecVT) {
+  unsigned EltWidth = Width + 1;
+  if (TLI.isTypeLegal(VecVT)) {
+    // We start with the preferred with, making it a power of 2 and find a 
+    // legal vector type of that width.  If not, we reduce it by another of 2.
+    // For incoming type is legal, this process will end as a vector of the
+    // smallest loadable type should always be legal.
+    do {
+      assert(EltWidth > 0);
+      EltWidth = 1 << Log2_32(EltWidth - 1);
+      NewEltVT = MVT::getIntegerVT(EltWidth);
+      unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
+      NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+    } while (!TLI.isTypeLegal(NewVecVT) ||
+             VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
+  } else {
+    // The incoming vector type is illegal and is the result of widening
+    // a vector to a power of 2. In this case, we will use the preferred
+    // with as long as it is a multiple of the incoming vector length.
+    // The legalization process will eventually make this into a legal type
+    // and remove the illegal bit converts (which would turn to stack converts
+    // if they are allow to exist).
+     do {
+      assert(EltWidth > 0);
+      EltWidth = 1 << Log2_32(EltWidth - 1);
+      NewEltVT = MVT::getIntegerVT(EltWidth);
+      unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
+      NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+    } while (!TLI.isTypeLegal(NewEltVT) ||
+             VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
+  }
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+                                              SDValue      Chain,
+                                              SDValue      BasePtr,
+                                              const Value *SV,
+                                              int          SVOffset,
+                                              unsigned     Alignment,
+                                              bool         isVolatile,
+                                              unsigned     LdWidth,
+                                              MVT          ResType) {
+  // The strategy assumes that we can efficiently load powers of two widths.
+  // The routines chops the vector into the largest power of 2 load and
+  // can be inserted into a legal vector and then cast the result into the
+  // vector type we want.  This avoids unnecessary stack converts.
+
+  // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and
+  //       the load is nonvolatile, we an use a wider load for the value.
+
+  // Find the vector type that can load from.
+  MVT NewEltVT, NewVecVT;
+  unsigned NewEltVTWidth;
+  FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+  NewEltVTWidth = NewEltVT.getSizeInBits();
+
+  SDValue LdOp = DAG.getLoad(NewEltVT, Chain, BasePtr, SV, SVOffset, isVolatile,
+                             Alignment);
+  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, NewVecVT, LdOp);
+  LdChain.push_back(LdOp.getValue(1));
+
+  // Check if we can load the element with one instruction
+  if (LdWidth == NewEltVTWidth) {
+    return DAG.getNode(ISD::BIT_CONVERT, ResType, VecOp);
+  }
+
+  unsigned Idx = 1;
+  LdWidth -= NewEltVTWidth;
+  unsigned Offset = 0;
+    
+  while (LdWidth > 0) {
+    unsigned Increment = NewEltVTWidth / 8;
+    Offset += Increment;
+    BasePtr = DAG.getNode(ISD::ADD, BasePtr.getValueType(), BasePtr,
+                          DAG.getIntPtrConstant(Increment));
+
+    if (LdWidth < NewEltVTWidth) {
+      // Our current type we are using is too large, use a smaller size by
+      // using a smaller power of 2
+      unsigned oNewEltVTWidth = NewEltVTWidth;
+      FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+      NewEltVTWidth = NewEltVT.getSizeInBits();
+      // Readjust position and vector position based on new load type
+      Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
+      VecOp = DAG.getNode(ISD::BIT_CONVERT, NewVecVT, VecOp);
+    }
+      
+    SDValue LdOp = DAG.getLoad(NewEltVT, Chain, BasePtr, SV,
+                                 SVOffset+Offset, isVolatile,
+                                 MinAlign(Alignment, Offset));
+    LdChain.push_back(LdOp.getValue(1));
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, NewVecVT, VecOp, LdOp,
+                        DAG.getIntPtrConstant(Idx++));
+    
+    LdWidth -= NewEltVTWidth;
+  }
+
+  return DAG.getNode(ISD::BIT_CONVERT, ResType, VecOp);
+}
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+                                            SDValue   Chain,
+                                            SDValue   BasePtr,
+                                            const Value *SV,
+                                            int         SVOffset,
+                                            unsigned    Alignment,
+                                            bool        isVolatile,
+                                            SDValue     ValOp,
+                                            unsigned    StWidth) {
+  // Breaks the stores into a series of power of 2 width stores.  For any
+  // width, we convert the vector to the vector of element size that we
+  // want to store.  This avoids requiring a stack convert.
+  
+  // Find a width of the element type we can store with
+  MVT WidenVT = ValOp.getValueType();
+  MVT NewEltVT, NewVecVT;
+
+  FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+  unsigned NewEltVTWidth = NewEltVT.getSizeInBits();
+
+  SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, NewVecVT, ValOp);
+  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, NewEltVT, VecOp,
+                            DAG.getIntPtrConstant(0));
+  SDValue StOp = DAG.getStore(Chain, EOp, BasePtr, SV, SVOffset,
+                               isVolatile, Alignment);
+  StChain.push_back(StOp);
+
+  // Check if we are done
+  if (StWidth == NewEltVTWidth) {
+    return;
+  }
+  
+  unsigned Idx = 1;
+  StWidth -= NewEltVTWidth;
+  unsigned Offset = 0;
+    
+  while (StWidth > 0) {
+    unsigned Increment = NewEltVTWidth / 8;
+    Offset += Increment;
+    BasePtr = DAG.getNode(ISD::ADD, BasePtr.getValueType(), BasePtr,
+                          DAG.getIntPtrConstant(Increment));
+                          
+    if (StWidth < NewEltVTWidth) {
+      // Our current type we are using is too large, use a smaller size by
+      // using a smaller power of 2
+      unsigned oNewEltVTWidth = NewEltVTWidth;
+      FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+      NewEltVTWidth = NewEltVT.getSizeInBits();
+      // Readjust position and vector position based on new load type
+      Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
+      VecOp = DAG.getNode(ISD::BIT_CONVERT, NewVecVT, VecOp);
+    }
+    
+    EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, NewEltVT, VecOp,
+                      DAG.getIntPtrConstant(Idx++));
+    StChain.push_back(DAG.getStore(Chain, EOp, BasePtr, SV,
+                                   SVOffset + Offset, isVolatile,
+                                   MinAlign(Alignment, Offset)));
+    StWidth -= NewEltVTWidth;
+  }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) {
+  // Note that InOp might have been widened so it might already have
+  // the right width or it might need be narrowed.
+  MVT InVT = InOp.getValueType();
+  assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+         "input and widen element type must match");
+
+  // Check if InOp already has the right width.
+  if (InVT == NVT)
+    return InOp;
+
+  unsigned InNumElts = InVT.getVectorNumElements();
+  unsigned WidenNumElts = NVT.getVectorNumElements();  
+  if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+    unsigned NumConcat = WidenNumElts / InNumElts;
+    SmallVector<SDValue, 16> Ops(NumConcat);
+    SDValue UndefVal = DAG.getNode(ISD::UNDEF, InVT);
+    Ops[0] = InOp;
+    for (unsigned i = 1; i != NumConcat; ++i)
+      Ops[i] = UndefVal;
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, NVT, &Ops[0], NumConcat);
+  }
+  
+  if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, NVT, InOp,
+                       DAG.getIntPtrConstant(0));
+  
+  // Fall back to extract and build.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  MVT EltVT = NVT.getVectorElementType();
+  unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+  unsigned Idx;
+  for (Idx = 0; Idx < MinNumElts; ++Idx)
+    Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InOp,
+                           DAG.getIntPtrConstant(Idx));
+
+  SDValue UndefVal = DAG.getNode(ISD::UNDEF, EltVT);
+  for ( ; Idx < WidenNumElts; ++Idx)
+    Ops[Idx] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7777e23..f2800bf 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -572,8 +572,32 @@
                                IntermediateVT, NumIntermediates,
                                RegisterVT);
       RegisterTypeForVT[i] = RegisterVT;
-      TransformToType[i] = MVT::Other; // this isn't actually used
-      ValueTypeActions.setTypeAction(VT, Promote);
+      
+      // Determine if there is a legal wider type.
+      bool IsLegalWiderType = false;
+      MVT EltVT = VT.getVectorElementType();
+      unsigned NElts = VT.getVectorNumElements();
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        MVT SVT = (MVT::SimpleValueType)nVT;
+        if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
+            SVT.getVectorNumElements() > NElts) {
+          TransformToType[i] = SVT;
+          ValueTypeActions.setTypeAction(VT, Promote);
+          IsLegalWiderType = true;
+          break;
+        }
+      }
+      if (!IsLegalWiderType) {
+        MVT NVT = VT.getPow2VectorType();
+        if (NVT == VT) {
+          // Type is already a power of 2.  The default action is to split.
+          TransformToType[i] = MVT::Other;
+          ValueTypeActions.setTypeAction(VT, Expand);
+        } else {
+          TransformToType[i] = NVT;
+          ValueTypeActions.setTypeAction(VT, Promote);
+        }
+      }
     }
   }
 }
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 0229f15..fe4af05 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -99,6 +99,9 @@
   case MVT::isVoid:  return "isVoid";
   case MVT::Other:   return "ch";
   case MVT::Flag:    return "flag";
+  case MVT::v2i8:    return "v2i8";
+  case MVT::v4i8:    return "v4i8";
+  case MVT::v2i16:   return "v2i16";
   case MVT::v8i8:    return "v8i8";
   case MVT::v4i16:   return "v4i16";
   case MVT::v2i32:   return "v2i32";
@@ -135,6 +138,9 @@
   case MVT::f80:     return Type::X86_FP80Ty;
   case MVT::f128:    return Type::FP128Ty;
   case MVT::ppcf128: return Type::PPC_FP128Ty;
+  case MVT::v2i8:    return VectorType::get(Type::Int8Ty, 2);
+  case MVT::v4i8:    return VectorType::get(Type::Int8Ty, 4);
+  case MVT::v2i16:    return VectorType::get(Type::Int16Ty, 2);
   case MVT::v8i8:    return VectorType::get(Type::Int8Ty, 8);
   case MVT::v4i16:   return VectorType::get(Type::Int16Ty, 4);
   case MVT::v2i32:   return VectorType::get(Type::Int32Ty, 2);
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 2ff1b19..c6e94a5 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -51,6 +51,9 @@
   case MVT::ppcf128:  return "MVT::ppcf128";
   case MVT::Flag:  return "MVT::Flag";
   case MVT::isVoid:return "MVT::isVoid";
+  case MVT::v2i8:  return "MVT::v2i8";
+  case MVT::v4i8:  return "MVT::v4i8";
+  case MVT::v2i16: return "MVT::v2i16";
   case MVT::v8i8:  return "MVT::v8i8";
   case MVT::v4i16: return "MVT::v4i16";
   case MVT::v2i32: return "MVT::v2i32";
@@ -88,6 +91,9 @@
   case MVT::ppcf128:  return "MVT::ppcf128";
   case MVT::Flag:  return "MVT::Flag";
   case MVT::isVoid:return "MVT::isVoid";
+  case MVT::v2i8:  return "MVT::v2i8";
+  case MVT::v4i8:  return "MVT::v4i8";
+  case MVT::v2i16: return "MVT::v2i16";
   case MVT::v8i8:  return "MVT::v8i8";
   case MVT::v4i16: return "MVT::v4i16";
   case MVT::v2i32: return "MVT::v2i32";
