libbcc

Change-Id: Ieaa3ebd5a38f370752495549f8870b534eeedfc5
diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk
new file mode 100644
index 0000000..eb15a18
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Android.mk
@@ -0,0 +1,48 @@
+LOCAL_PATH:= $(call my-dir)
+
+codegen_selectiondag_SRC_FILES :=	\
+	CallingConvLower.cpp	\
+	DAGCombiner.cpp	\
+	FastISel.cpp	\
+	FunctionLoweringInfo.cpp	\
+	InstrEmitter.cpp	\
+	LegalizeDAG.cpp	\
+	LegalizeFloatTypes.cpp	\
+	LegalizeIntegerTypes.cpp	\
+	LegalizeTypes.cpp	\
+	LegalizeTypesGeneric.cpp	\
+	LegalizeVectorOps.cpp	\
+	LegalizeVectorTypes.cpp	\
+	ScheduleDAGFast.cpp	\
+	ScheduleDAGList.cpp	\
+	ScheduleDAGRRList.cpp	\
+	ScheduleDAGSDNodes.cpp	\
+	SelectionDAG.cpp	\
+	SelectionDAGBuilder.cpp	\
+	SelectionDAGISel.cpp	\
+	SelectionDAGPrinter.cpp	\
+	TargetLowering.cpp  
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSelectionDAG
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSelectionDAG
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9189e71..3be6b43 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1064,7 +1064,7 @@
   if (VT.isInteger() && !VT.isVector()) {
     APInt LHSZero, LHSOne;
     APInt RHSZero, RHSOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
     DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
 
     if (LHSZero.getBoolValue()) {
@@ -1136,7 +1136,7 @@
   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   APInt LHSZero, LHSOne;
   APInt RHSZero, RHSOne;
-  APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
   DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
 
   if (LHSZero.getBoolValue()) {
@@ -1758,7 +1758,7 @@
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N1.getValueType();
-  unsigned BitWidth = VT.getSizeInBits();
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1786,7 +1786,7 @@
   SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
   if (RAND.getNode() != 0)
     return RAND;
-  // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+  // fold (and (or x, C), D) -> D if (C & D) == D
   if (N1C && N0.getOpcode() == ISD::OR)
     if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
@@ -1872,16 +1872,17 @@
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - MemVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(), LN0->getBasePtr(),
                                        LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1894,16 +1895,17 @@
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - MemVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1935,7 +1937,8 @@
             DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), LN0->getBasePtr(),
                            LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                           ExtVT, LN0->isVolatile(), LN0->getAlignment());
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           LN0->getAlignment());
           AddToWorkList(N);
           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1970,7 +1973,8 @@
             DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), NewPtr,
                            LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                           ExtVT, LN0->isVolatile(), Alignment);
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           Alignment);
           AddToWorkList(N);
           CombineTo(LN0, Load, Load.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -2021,13 +2025,15 @@
   if (ROR.getNode() != 0)
     return ROR;
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  // iff (c1 & c2) == 0.
   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
              isa<ConstantSDNode>(N0.getOperand(1))) {
     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
-    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
-                       DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
-                                   N0.getOperand(0), N1),
-                       DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                                     N0.getOperand(0), N1),
+                         DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
   }
   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -2750,7 +2756,7 @@
   if (N1C && N0.getOpcode() == ISD::CTLZ &&
       N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
     APInt KnownZero, KnownOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
     DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
 
     // If any of the input bits are KnownOne, then the input couldn't be all
@@ -3143,7 +3149,8 @@
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3185,7 +3192,8 @@
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3315,7 +3323,8 @@
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3357,7 +3366,8 @@
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
@@ -3471,7 +3481,8 @@
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3513,7 +3524,8 @@
                                      VT, LN0->getChain(), LN0->getBasePtr(),
                                      LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), MemVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
               DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3636,10 +3648,11 @@
     SDValue Load = (ExtType == ISD::NON_EXTLOAD)
       ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
                     LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                    LN0->isVolatile(), NewAlign)
+                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
       : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
                        LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                       ExtVT, LN0->isVolatile(), NewAlign);
+                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                       NewAlign);
 
     // Replace the old load's chain with the new load's chain.
     WorkListRemover DeadNodes(*this);
@@ -3726,7 +3739,8 @@
                                      LN0->getChain(),
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -3742,7 +3756,8 @@
                                      LN0->getChain(),
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -3826,7 +3841,7 @@
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
       return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
                          LD1->getBasePtr(), LD1->getSrcValue(),
-                         LD1->getSrcValueOffset(), false, Align);
+                         LD1->getSrcValueOffset(), false, false, Align);
   }
 
   return SDValue();
@@ -3896,7 +3911,8 @@
       SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
                                  LN0->getBasePtr(),
                                  LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                                 LN0->isVolatile(), OrigAlign);
+                                 LN0->isVolatile(), LN0->isNonTemporal(),
+                                 OrigAlign);
       AddToWorkList(N);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
@@ -4492,7 +4508,8 @@
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(),
                                      N0.getValueType(),
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
               DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
@@ -4640,7 +4657,8 @@
             DAG.DeleteNode(Trunc);
           }
           // Replace the uses of SRL with SETCC
-          DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+          WorkListRemover DeadNodes(*this);
+          DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
           removeFromWorkList(N1.getNode());
           DAG.DeleteNode(N1.getNode());
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -4648,6 +4666,56 @@
       }
     }
   }
+  
+  // Transform br(xor(x, y)) -> br(x != y)
+  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+    SDNode *TheXor = N1.getNode();
+    SDValue Op0 = TheXor->getOperand(0);
+    SDValue Op1 = TheXor->getOperand(1);
+    if (Op0.getOpcode() == Op1.getOpcode()) {
+      // Avoid missing important xor optimizations.
+      SDValue Tmp = visitXOR(TheXor);
+      if (Tmp.getNode()) {
+        DEBUG(dbgs() << "\nReplacing.8 ";
+              TheXor->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Tmp.getNode()->dump(&DAG);
+              dbgs() << '\n');
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+        removeFromWorkList(TheXor);
+        DAG.DeleteNode(TheXor);
+        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                           MVT::Other, Chain, Tmp, N2);
+      }
+    }
+
+    if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+      bool Equal = false;
+      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+            Op0.getOpcode() == ISD::XOR) {
+          TheXor = Op0.getNode();
+          Equal = true;
+        }
+
+      EVT SetCCVT = N1.getValueType();
+      if (LegalTypes)
+        SetCCVT = TLI.getSetCCResultType(SetCCVT);
+      SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+                                   SetCCVT,
+                                   Op0, Op1,
+                                   Equal ? ISD::SETEQ : ISD::SETNE);
+      // Replace the uses of XOR with SETCC
+      WorkListRemover DeadNodes(*this);
+      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+      removeFromWorkList(N1.getNode());
+      DAG.DeleteNode(N1.getNode());
+      return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                         MVT::Other, Chain, SetCC, N2);
+    }
+  }
 
   return SDValue();
 }
@@ -4960,7 +5028,7 @@
                               LD->getValueType(0),
                               Chain, Ptr, LD->getSrcValue(),
                               LD->getSrcValueOffset(), LD->getMemoryVT(),
-                              LD->isVolatile(), Align);
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
     }
   }
 
@@ -4997,7 +5065,7 @@
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
       if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
-        DEBUG(dbgs() << "\nReplacing.6 ";
+        DEBUG(dbgs() << "\nReplacing.7 ";
               N->dump(&DAG);
               dbgs() << "\nWith: ";
               Undef.getNode()->dump(&DAG);
@@ -5042,7 +5110,8 @@
         ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
                                BetterChain, Ptr,
                                LD->getSrcValue(), LD->getSrcValueOffset(),
-                               LD->isVolatile(), LD->getAlignment());
+                               LD->isVolatile(), LD->isNonTemporal(),
+                               LD->getAlignment());
       } else {
         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
                                   LD->getValueType(0),
@@ -5050,6 +5119,7 @@
                                   LD->getSrcValueOffset(),
                                   LD->getMemoryVT(),
                                   LD->isVolatile(),
+                                  LD->isNonTemporal(),
                                   LD->getAlignment());
       }
 
@@ -5149,13 +5219,14 @@
       SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
                                   LD->getChain(), NewPtr,
                                   LD->getSrcValue(), LD->getSrcValueOffset(),
-                                  LD->isVolatile(), NewAlign);
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  NewAlign);
       SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
                                    DAG.getConstant(NewImm, NewVT));
       SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
                                    NewVal, NewPtr,
                                    ST->getSrcValue(), ST->getSrcValueOffset(),
-                                   false, NewAlign);
+                                   false, false, NewAlign);
 
       AddToWorkList(NewPtr.getNode());
       AddToWorkList(NewLD.getNode());
@@ -5184,7 +5255,7 @@
         return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
                                  Ptr, ST->getSrcValue(),
                                  ST->getSrcValueOffset(), ST->getMemoryVT(),
-                                 ST->isVolatile(), Align);
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
     }
   }
 
@@ -5201,7 +5272,8 @@
          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
       return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
                           Ptr, ST->getSrcValue(),
-                          ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+                          ST->getSrcValueOffset(), ST->isVolatile(),
+                          ST->isNonTemporal(), OrigAlign);
   }
 
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -5227,7 +5299,7 @@
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                               Ptr, ST->getSrcValue(),
                               ST->getSrcValueOffset(), ST->isVolatile(),
-                              ST->getAlignment());
+                              ST->isNonTemporal(), ST->getAlignment());
         }
         break;
       case MVT::f64:
@@ -5239,7 +5311,7 @@
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                               Ptr, ST->getSrcValue(),
                               ST->getSrcValueOffset(), ST->isVolatile(),
-                              ST->getAlignment());
+                              ST->isNonTemporal(), ST->getAlignment());
         } else if (!ST->isVolatile() &&
                    TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
           // Many FP stores are not made apparent until after legalize, e.g. for
@@ -5253,18 +5325,21 @@
           int SVOffset = ST->getSrcValueOffset();
           unsigned Alignment = ST->getAlignment();
           bool isVolatile = ST->isVolatile();
+          bool isNonTemporal = ST->isNonTemporal();
 
           SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
                                      Ptr, ST->getSrcValue(),
                                      ST->getSrcValueOffset(),
-                                     isVolatile, ST->getAlignment());
+                                     isVolatile, isNonTemporal,
+                                     ST->getAlignment());
           Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
                             DAG.getConstant(4, Ptr.getValueType()));
           SVOffset += 4;
           Alignment = MinAlign(Alignment, 4U);
           SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
                                      Ptr, ST->getSrcValue(),
-                                     SVOffset, isVolatile, Alignment);
+                                     SVOffset, isVolatile, isNonTemporal,
+                                     Alignment);
           return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
                              St0, St1);
         }
@@ -5286,12 +5361,13 @@
       if (ST->isTruncatingStore()) {
         ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                       ST->getSrcValue(),ST->getSrcValueOffset(),
-                                      ST->getMemoryVT(),
-                                      ST->isVolatile(), ST->getAlignment());
+                                      ST->getMemoryVT(), ST->isVolatile(),
+                                      ST->isNonTemporal(), ST->getAlignment());
       } else {
         ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                  ST->getSrcValue(), ST->getSrcValueOffset(),
-                                 ST->isVolatile(), ST->getAlignment());
+                                 ST->isVolatile(), ST->isNonTemporal(),
+                                 ST->getAlignment());
       }
 
       // Create token to keep both nodes around.
@@ -5325,7 +5401,8 @@
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
                                Ptr, ST->getSrcValue(),
                                ST->getSrcValueOffset(), ST->getMemoryVT(),
-                               ST->isVolatile(), ST->getAlignment());
+                               ST->isVolatile(), ST->isNonTemporal(),
+                               ST->getAlignment());
 
     // Otherwise, see if we can simplify the operation with
     // SimplifyDemandedBits, which only works if the value has a single use.
@@ -5358,7 +5435,8 @@
     return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
                              Ptr, ST->getSrcValue(),
                              ST->getSrcValueOffset(), ST->getMemoryVT(),
-                             ST->isVolatile(), ST->getAlignment());
+                             ST->isVolatile(), ST->isNonTemporal(),
+                             ST->getAlignment());
   }
 
   return ReduceLoadOpStoreWidth(N);
@@ -5503,7 +5581,7 @@
 
     return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
                        LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                       LN0->isVolatile(), Align);
+                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
   }
 
   return SDValue();
@@ -5883,6 +5961,7 @@
                                LLD->getChain(),
                                Addr, 0, 0,
                                LLD->isVolatile(),
+                               LLD->isNonTemporal(),
                                LLD->getAlignment());
           } else {
             Load = DAG.getExtLoad(LLD->getExtensionType(),
@@ -5891,6 +5970,7 @@
                                   LLD->getChain(), Addr, 0, 0,
                                   LLD->getMemoryVT(),
                                   LLD->isVolatile(),
+                                  LLD->isNonTemporal(),
                                   LLD->getAlignment());
           }
 
@@ -5998,7 +6078,7 @@
                             CstOffset);
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                            PseudoSourceValue::getConstantPool(), 0, false,
-                           Alignment);
+                           false, Alignment);
 
       }
     }  
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 35ef5b7..1d76c7c 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -350,6 +350,34 @@
     (void)TargetSelectInstruction(cast<Instruction>(I));
     return true;
   }
+  case Intrinsic::dbg_value: {
+    // This requires target support, but right now X86 is the only Fast target.
+    DbgValueInst *DI = cast<DbgValueInst>(I);
+    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    Value *V = DI->getValue();
+    if (!V) {
+      // Currently the optimizer can produce this; insert an undef to
+      // help debugging.  Probably the optimizer should not do this.
+      BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    } else if (unsigned Reg = lookUpRegForValue(V)) {
+      BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    } else {
+      // We can't yet handle anything else here because it would require
+      // generating code, thus altering codegen because of debug info.
+      // Insert an undef so we can see what we dropped.
+      BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    }     
+    return true;
+  }
   case Intrinsic::eh_exception: {
     EVT VT = TLI.getValueType(I->getType());
     switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 02fe85d..625de11 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -15,6 +15,7 @@
 
 #define DEBUG_TYPE "instr-emitter"
 #include "InstrEmitter.h"
+#include "SDDbgValue.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -497,6 +498,56 @@
   assert(isNew && "Node emitted out of order - early");
 }
 
+/// EmitDbgValue - Generate any debug info that refers to this Node.  Constant
+/// dbg_value is not handled here.
+void
+InstrEmitter::EmitDbgValue(SDNode *Node,
+                           DenseMap<SDValue, unsigned> &VRBaseMap,
+                           SDDbgValue *sd) {
+  if (!Node->getHasDebugValue())
+    return;
+  if (!sd)
+    return;
+  unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap);
+  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  DebugLoc DL = sd->getDebugLoc();
+  MachineInstr *MI;
+  if (VReg) {
+    MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug).
+                              addImm(sd->getOffset()).
+                              addMetadata(sd->getMDPtr());
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
+                                    addMetadata(sd->getMDPtr());
+  }
+  MBB->insert(InsertPos, MI);
+}
+
+/// EmitDbgValue - Generate constant debug info.  No SDNode is involved.
+void
+InstrEmitter::EmitDbgValue(SDDbgValue *sd) {
+  if (!sd)
+    return;
+  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  DebugLoc DL = sd->getDebugLoc();
+  MachineInstr *MI;
+  Value *V = sd->getConst();
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()).
+                                   addImm(sd->getOffset()).
+                                   addMetadata(sd->getMDPtr());
+  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    MI = BuildMI(*MF, DL, II).addFPImm(CF).addImm(sd->getOffset()).
+                                   addMetadata(sd->getMDPtr());
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
+                                    addMetadata(sd->getMDPtr());
+  }
+  MBB->insert(InsertPos, MI);
+}
+
 /// EmitNode - Generate machine code for a node and needed dependencies.
 ///
 void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 91817e4..4fe9f19 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -23,6 +23,7 @@
 namespace llvm {
 
 class TargetInstrDesc;
+class SDDbgValue;
 
 class InstrEmitter {
   MachineFunction *MF;
@@ -97,6 +98,16 @@
   /// MachineInstr.
   static unsigned CountOperands(SDNode *Node);
 
+  /// EmitDbgValue - Generate any debug info that refers to this Node.  Constant
+  /// dbg_value is not handled here.
+  void EmitDbgValue(SDNode *Node,
+                    DenseMap<SDValue, unsigned> &VRBaseMap,
+                    SDDbgValue* sd);
+
+
+  /// EmitDbgValue - Generate a constant DBG_VALUE.  No node is involved.
+  void EmitDbgValue(SDDbgValue* sd);
+
   /// EmitNode - Generate machine code for a node and needed dependencies.
   ///
   void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 78e6e4e..f498263 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -377,9 +377,10 @@
     return DAG.getExtLoad(ISD::EXTLOAD, dl,
                           OrigVT, DAG.getEntryNode(),
                           CPIdx, PseudoSourceValue::getConstantPool(),
-                          0, VT, false, Alignment);
+                          0, VT, false, false, Alignment);
   return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
-                     PseudoSourceValue::getConstantPool(), 0, false, Alignment);
+                     PseudoSourceValue::getConstantPool(), 0, false, false,
+                     Alignment);
 }
 
 /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
@@ -402,7 +403,8 @@
       // FIXME: Does not handle truncating floating point stores!
       SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
       return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
-                          SVOffset, ST->isVolatile(), Alignment);
+                          SVOffset, ST->isVolatile(), ST->isNonTemporal(),
+                          Alignment);
     } else {
       // Do a (aligned) store to a stack slot, then copy from the stack slot
       // to the final destination using (unaligned) integer loads and stores.
@@ -418,7 +420,8 @@
 
       // Perform the original store, only redirected to the stack slot.
       SDValue Store = DAG.getTruncStore(Chain, dl,
-                                        Val, StackPtr, NULL, 0, StoredVT);
+                                        Val, StackPtr, NULL, 0, StoredVT,
+                                        false, false, 0);
       SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
       SmallVector<SDValue, 8> Stores;
       unsigned Offset = 0;
@@ -426,11 +429,12 @@
       // Do all but one copies using the full register width.
       for (unsigned i = 1; i < NumRegs; i++) {
         // Load one integer register's worth from the stack slot.
-        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);
+        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+                                   false, false, 0);
         // Store it to the final location.  Remember the store.
         Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
                                       ST->getSrcValue(), SVOffset + Offset,
-                                      ST->isVolatile(),
+                                      ST->isVolatile(), ST->isNonTemporal(),
                                       MinAlign(ST->getAlignment(), Offset)));
         // Increment the pointers.
         Offset += RegBytes;
@@ -446,11 +450,12 @@
 
       // Load from the stack slot.
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
-                                    NULL, 0, MemVT);
+                                    NULL, 0, MemVT, false, false, 0);
 
       Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
                                          ST->getSrcValue(), SVOffset + Offset,
                                          MemVT, ST->isVolatile(),
+                                         ST->isNonTemporal(),
                                          MinAlign(ST->getAlignment(), Offset)));
       // The order of the stores doesn't matter - say it with a TokenFactor.
       return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -474,13 +479,14 @@
   SDValue Store1, Store2;
   Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
                              ST->getSrcValue(), SVOffset, NewStoredVT,
-                             ST->isVolatile(), Alignment);
+                             ST->isVolatile(), ST->isNonTemporal(), Alignment);
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getConstant(IncrementSize, TLI.getPointerTy()));
   Alignment = MinAlign(Alignment, IncrementSize);
   Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
                              ST->getSrcValue(), SVOffset + IncrementSize,
-                             NewStoredVT, ST->isVolatile(), Alignment);
+                             NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+                             Alignment);
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
 }
@@ -502,7 +508,7 @@
       // then bitconvert to floating point or vector.
       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
                                     SVOffset, LD->isVolatile(),
-                                    LD->getAlignment());
+                                    LD->isNonTemporal(), LD->getAlignment());
       SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
       if (VT.isFloatingPoint() && LoadedVT != VT)
         Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
@@ -530,10 +536,11 @@
         // Load one integer register's worth from the original location.
         SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
                                    SVOffset + Offset, LD->isVolatile(),
+                                   LD->isNonTemporal(),
                                    MinAlign(LD->getAlignment(), Offset));
         // Follow the load with a store to the stack slot.  Remember the store.
         Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
-                                      NULL, 0));
+                                      NULL, 0, false, false, 0));
         // Increment the pointers.
         Offset += RegBytes;
         Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
@@ -546,12 +553,13 @@
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                     LD->getSrcValue(), SVOffset + Offset,
                                     MemVT, LD->isVolatile(),
+                                    LD->isNonTemporal(),
                                     MinAlign(LD->getAlignment(), Offset));
       // Follow the load with a store to the stack slot.  Remember the store.
       // On big-endian machines this requires a truncating store to ensure
       // that the bits end up in the right place.
       Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
-                                         NULL, 0, MemVT));
+                                         NULL, 0, MemVT, false, false, 0));
 
       // The order of the stores doesn't matter - say it with a TokenFactor.
       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -559,7 +567,7 @@
 
       // Finally, perform the original load only redirected to the stack slot.
       Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
-                            NULL, 0, LoadedVT);
+                            NULL, 0, LoadedVT, false, false, 0);
 
       // Callers expect a MERGE_VALUES node.
       SDValue Ops[] = { Load, TF };
@@ -588,20 +596,22 @@
   SDValue Lo, Hi;
   if (TLI.isLittleEndian()) {
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+                        SVOffset, NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
                         SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
-                        MinAlign(Alignment, IncrementSize));
+                        LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
   } else {
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+                        SVOffset, NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
                         SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
-                        MinAlign(Alignment, IncrementSize));
+                        LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
   }
 
   // aggregate the two parts
@@ -643,7 +653,8 @@
 
   // Store the vector.
   SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
-                            PseudoSourceValue::getFixedStack(SPFI), 0);
+                            PseudoSourceValue::getFixedStack(SPFI), 0,
+                            false, false, 0);
 
   // Truncate or zero extend offset to target pointer type.
   unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -654,10 +665,12 @@
   SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
   // Store the scalar value.
   Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
-                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);
+                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+                         false, false, 0);
   // Load the updated vector.
   return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0);
+                     PseudoSourceValue::getFixedStack(SPFI), 0,
+                     false, false, 0);
 }
 
 
@@ -702,6 +715,7 @@
   int SVOffset = ST->getSrcValueOffset();
   unsigned Alignment = ST->getAlignment();
   bool isVolatile = ST->isVolatile();
+  bool isNonTemporal = ST->isNonTemporal();
   DebugLoc dl = ST->getDebugLoc();
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
     if (CFP->getValueType(0) == MVT::f32 &&
@@ -710,14 +724,14 @@
                                       bitcastToAPInt().zextOrTrunc(32),
                               MVT::i32);
       return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, Alignment);
+                          SVOffset, isVolatile, isNonTemporal, Alignment);
     } else if (CFP->getValueType(0) == MVT::f64) {
       // If this target supports 64-bit registers, do a single 64-bit store.
       if (getTypeAction(MVT::i64) == Legal) {
         Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                   zextOrTrunc(64), MVT::i64);
         return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                            SVOffset, isVolatile, Alignment);
+                            SVOffset, isVolatile, isNonTemporal, Alignment);
       } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
@@ -728,11 +742,11 @@
         if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
         Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, Alignment);
+                          SVOffset, isVolatile, isNonTemporal, Alignment);
         Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                             DAG.getIntPtrConstant(4));
         Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
-                          isVolatile, MinAlign(Alignment, 4U));
+                          isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
 
         return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
       }
@@ -1108,7 +1122,8 @@
 
         Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
                            LD->getSrcValueOffset(),
-                           LD->isVolatile(), LD->getAlignment());
+                           LD->isVolatile(), LD->isNonTemporal(),
+                           LD->getAlignment());
         Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
         Tmp4 = LegalizeOp(Tmp1.getValue(1));
         break;
@@ -1125,6 +1140,7 @@
       int SVOffset = LD->getSrcValueOffset();
       unsigned Alignment = LD->getAlignment();
       bool isVolatile = LD->isVolatile();
+      bool isNonTemporal = LD->isNonTemporal();
 
       if (SrcWidth != SrcVT.getStoreSizeInBits() &&
           // Some targets pretend to have an i1 loading operation, and actually
@@ -1150,7 +1166,7 @@
 
         Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
                                 Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
-                                NVT, isVolatile, Alignment);
+                                NVT, isVolatile, isNonTemporal, Alignment);
 
         Ch = Result.getValue(1); // The chain.
 
@@ -1187,7 +1203,7 @@
           Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
                               Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              Alignment);
+                              isNonTemporal, Alignment);
 
           // Load the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1195,7 +1211,7 @@
                              DAG.getIntPtrConstant(IncrementSize));
           Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile,
+                              ExtraVT, isVolatile, isNonTemporal,
                               MinAlign(Alignment, IncrementSize));
 
           // Build a factor node to remember that this load is independent of the
@@ -1215,7 +1231,7 @@
           // Load the top RoundWidth bits.
           Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              Alignment);
+                              isNonTemporal, Alignment);
 
           // Load the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1224,7 +1240,7 @@
           Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
                               Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile,
+                              ExtraVT, isVolatile, isNonTemporal,
                               MinAlign(Alignment, IncrementSize));
 
           // Build a factor node to remember that this load is independent of the
@@ -1284,7 +1300,8 @@
               (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
             SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
                                        LD->getSrcValueOffset(),
-                                       LD->isVolatile(), LD->getAlignment());
+                                       LD->isVolatile(), LD->isNonTemporal(),
+                                       LD->getAlignment());
             Result = DAG.getNode(ISD::FP_EXTEND, dl,
                                  Node->getValueType(0), Load);
             Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
@@ -1297,7 +1314,8 @@
           Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
                                   Tmp1, Tmp2, LD->getSrcValue(),
                                   LD->getSrcValueOffset(), SrcVT,
-                                  LD->isVolatile(), LD->getAlignment());
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  LD->getAlignment());
           SDValue ValRes;
           if (ExtType == ISD::SEXTLOAD)
             ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1325,6 +1343,7 @@
     int SVOffset = ST->getSrcValueOffset();
     unsigned Alignment = ST->getAlignment();
     bool isVolatile = ST->isVolatile();
+    bool isNonTemporal = ST->isNonTemporal();
 
     if (!ST->isTruncatingStore()) {
       if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
@@ -1361,7 +1380,7 @@
                              TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
                                 ST->getSrcValue(), SVOffset, isVolatile,
-                                Alignment);
+                                isNonTemporal, Alignment);
           break;
         }
         break;
@@ -1379,7 +1398,8 @@
         EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
         Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                   SVOffset, NVT, isVolatile, Alignment);
+                                   SVOffset, NVT, isVolatile, isNonTemporal,
+                                   Alignment);
       } else if (StWidth & (StWidth - 1)) {
         // If not storing a power-of-2 number of bits, expand as two stores.
         assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1399,7 +1419,7 @@
           // Store the bottom RoundWidth bits.
           Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                  SVOffset, RoundVT,
-                                 isVolatile, Alignment);
+                                 isVolatile, isNonTemporal, Alignment);
 
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1409,6 +1429,7 @@
                            DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
                                  SVOffset + IncrementSize, ExtraVT, isVolatile,
+                                 isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         } else {
           // Big endian - avoid unaligned stores.
@@ -1417,7 +1438,8 @@
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
                            DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
-                                 SVOffset, RoundVT, isVolatile, Alignment);
+                                 SVOffset, RoundVT, isVolatile, isNonTemporal,
+                                 Alignment);
 
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1425,6 +1447,7 @@
                              DAG.getIntPtrConstant(IncrementSize));
           Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                  SVOffset + IncrementSize, ExtraVT, isVolatile,
+                                 isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         }
 
@@ -1457,7 +1480,8 @@
           assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                SVOffset, isVolatile, Alignment);
+                                SVOffset, isVolatile, isNonTemporal,
+                                Alignment);
           break;
         }
       }
@@ -1484,7 +1508,8 @@
   DebugLoc dl = Op.getDebugLoc();
   // Store the value to a temporary stack slot, then LOAD the returned part.
   SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
+                            false, false, 0);
 
   // Add the offset to the index.
   unsigned EltSize =
@@ -1500,10 +1525,12 @@
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
   if (Op.getValueType().isVector())
-    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+                       false, false, 0);
   else
     return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
-                          NULL, 0, Vec.getValueType().getVectorElementType());
+                          NULL, 0, Vec.getValueType().getVectorElementType(),
+                          false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1512,7 +1539,6 @@
   // the result as a vector.
   // Create the stack frame object.
   EVT VT = Node->getValueType(0);
-  EVT OpVT = Node->getOperand(0).getValueType();
   EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Node->getDebugLoc();
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
@@ -1532,13 +1558,16 @@
     SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
 
-    // If EltVT smaller than OpVT, only store the bits necessary.
-    if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
+    // If the destination vector element type is narrower than the source
+    // element type, only store the bits necessary.
+    if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
       Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
-                          Node->getOperand(i), Idx, SV, Offset, EltVT));
+                                         Node->getOperand(i), Idx, SV, Offset,
+                                         EltVT, false, false, 0));
     } else
       Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, 
-                                    Node->getOperand(i), Idx, SV, Offset));
+                                    Node->getOperand(i), Idx, SV, Offset,
+                                    false, false, 0));
   }
 
   SDValue StoreChain;
@@ -1549,7 +1578,7 @@
     StoreChain = DAG.getEntryNode();
 
   // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1572,12 +1601,14 @@
     SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
     SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
     SDValue Ch =
-        DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0,
+                   false, false, 0);
     if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
       LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
                             LoadPtr, DAG.getIntPtrConstant(4));
     SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
-                              Ch, LoadPtr, NULL, 0, MVT::i32);
+                             Ch, LoadPtr, NULL, 0, MVT::i32,
+                             false, false, 0);
   }
   SignBit =
       DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
@@ -1701,20 +1732,21 @@
 
   if (SrcSize > SlotSize)
     Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                              SV, 0, SlotVT, false, SrcAlign);
+                              SV, 0, SlotVT, false, false, SrcAlign);
   else {
     assert(SrcSize == SlotSize && "Invalid store");
     Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                         SV, 0, false, SrcAlign);
+                         SV, 0, false, false, SrcAlign);
   }
 
   // Result is a load from the stack slot.
   if (SlotSize == DestSize)
-    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);
+    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
+                       DestAlign);
 
   assert(SlotSize < DestSize && "Unknown extension!");
   return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
-                        false, DestAlign);
+                        false, false, DestAlign);
 }
 
 SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1729,9 +1761,11 @@
   SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
                                  StackPtr,
                                  PseudoSourceValue::getFixedStack(SPFI), 0,
-                                 Node->getValueType(0).getVectorElementType());
+                                 Node->getValueType(0).getVectorElementType(),
+                                 false, false, 0);
   return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0);
+                     PseudoSourceValue::getFixedStack(SPFI), 0,
+                     false, false, 0);
 }
 
 
@@ -1805,7 +1839,7 @@
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
     return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                        PseudoSourceValue::getConstantPool(), 0,
-                       false, Alignment);
+                       false, false, Alignment);
   }
 
   if (!MoreThanTwoValues) {
@@ -1865,8 +1899,7 @@
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), false,
                     /*isReturnValueUsed=*/true,
-                    Callee, Args, DAG,
-                    Node->getDebugLoc(), DAG.GetOrdering(Node));
+                    Callee, Args, DAG, Node->getDebugLoc());
 
   // Legalize the call sequence, starting with the chain.  This will advance
   // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
@@ -1943,13 +1976,16 @@
     }
     // store the lo of the constructed double - based on integer input
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
-                                  Op0Mapped, Lo, NULL, 0);
+                                  Op0Mapped, Lo, NULL, 0,
+                                  false, false, 0);
     // initial hi portion of constructed double
     SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
     // store the hi of the constructed double - biased exponent
-    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);
+    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
+                                false, false, 0);
     // load the constructed double
-    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);
+    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
+                               false, false, 0);
     // FP constant to bias correct the final result
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
@@ -1972,6 +2008,31 @@
     return Result;
   }
   assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+  // Implementation of unsigned i64 to f64 following the algorithm in
+  // __floatundidf in compiler_rt. This implementation has the advantage
+  // of performing rounding correctly, both in the default rounding mode
+  // and in all alternate rounding modes.
+  // TODO: Generalize this for use with other types.
+  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+    SDValue TwoP52 =
+      DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+    SDValue TwoP84PlusTwoP52 =
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+    SDValue TwoP84 =
+      DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+    SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+    SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+                             DAG.getConstant(32, MVT::i64));
+    SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+    SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+    SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
+    SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52);
+    return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+  }
+
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
 
   SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
@@ -2004,13 +2065,13 @@
   if (DestVT == MVT::f32)
     FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
                              PseudoSourceValue::getConstantPool(), 0,
-                             false, Alignment);
+                             false, false, Alignment);
   else {
     FudgeInReg =
       LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
                                 DAG.getEntryNode(), CPIdx,
                                 PseudoSourceValue::getConstantPool(), 0,
-                                MVT::f32, false, Alignment));
+                                MVT::f32, false, false, Alignment));
   }
 
   return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2271,7 +2332,7 @@
                       false, false, false, false, 0, CallingConv::C, false,
                       /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
-                      Args, DAG, dl, DAG.GetOrdering(Node));
+                      Args, DAG, dl);
     Results.push_back(CallResult.second);
     break;
   }
@@ -2350,16 +2411,19 @@
     EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
     Tmp2 = Node->getOperand(1);
-    SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
+    SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
+                                 false, false, 0);
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                        DAG.getConstant(TLI.getTargetData()->
                                        getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
-    Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
+    Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
+                        false, false, 0);
     // Load the actual argument out of the pointer VAList
-    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));
+    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+                                  false, false, 0));
     Results.push_back(Results[0].getValue(1));
     break;
   }
@@ -2369,8 +2433,9 @@
     const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
     const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
     Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
-                       Node->getOperand(2), VS, 0);
-    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);
+                       Node->getOperand(2), VS, 0, false, false, 0);
+    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
+                        false, false, 0);
     Results.push_back(Tmp1);
     break;
   }
@@ -2827,7 +2892,8 @@
 
     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
     SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
-                                PseudoSourceValue::getJumpTable(), 0, MemVT);
+                                PseudoSourceValue::getJumpTable(), 0, MemVT,
+                                false, false, 0);
     Addr = LD;
     if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
       // For PIC, the sequence is:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4f0fce7..35a7c7c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -444,7 +444,7 @@
     NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
                        NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
                        L->getSrcValue(), L->getSrcValueOffset(), NVT,
-                       L->isVolatile(), L->getAlignment());
+                       L->isVolatile(), L->isNonTemporal(), L->getAlignment());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
     ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -456,8 +456,8 @@
                      L->getMemoryVT(), L->getChain(),
                      L->getBasePtr(), L->getOffset(),
                      L->getSrcValue(), L->getSrcValueOffset(),
-                     L->getMemoryVT(),
-                     L->isVolatile(), L->getAlignment());
+                     L->getMemoryVT(), L->isVolatile(),
+                     L->isNonTemporal(), L->getAlignment());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -755,7 +755,8 @@
 
   return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
                       ST->getSrcValue(), ST->getSrcValueOffset(),
-                      ST->isVolatile(), ST->getAlignment());
+                      ST->isVolatile(), ST->isNonTemporal(),
+                      ST->getAlignment());
 }
 
 
@@ -1073,8 +1074,8 @@
 
   Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
                       LD->getSrcValue(), LD->getSrcValueOffset(),
-                      LD->getMemoryVT(),
-                      LD->isVolatile(), LD->getAlignment());
+                      LD->getMemoryVT(), LD->isVolatile(),
+                      LD->isNonTemporal(), LD->getAlignment());
 
   // Remember the chain.
   Chain = Hi.getValue(1);
@@ -1382,6 +1383,6 @@
 
   return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
                            ST->getSrcValue(), ST->getSrcValueOffset(),
-                           ST->getMemoryVT(),
-                           ST->isVolatile(), ST->getAlignment());
+                           ST->getMemoryVT(), ST->isVolatile(),
+                           ST->isNonTemporal(), ST->getAlignment());
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9932cf4..81f28ad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -359,7 +359,7 @@
   SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
                                N->getSrcValue(), N->getSrcValueOffset(),
                                N->getMemoryVT(), N->isVolatile(),
-                               N->getAlignment());
+                               N->isNonTemporal(), N->getAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -873,6 +873,7 @@
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
 
   SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.
@@ -880,7 +881,7 @@
   // Truncate the value and store the result.
   return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
                            SVOffset, N->getMemoryVT(),
-                           isVolatile, Alignment);
+                           isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
@@ -1079,8 +1080,8 @@
   SDValue Amt = N->getOperand(1);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   EVT ShTy = Amt.getValueType();
-  unsigned ShBits = ShTy.getSizeInBits();
-  unsigned NVTBits = NVT.getSizeInBits();
+  unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+  unsigned NVTBits = NVT.getScalarType().getSizeInBits();
   assert(isPowerOf2_32(NVTBits) &&
          "Expanded integer type size not a power of two!");
   DebugLoc dl = N->getDebugLoc();
@@ -1500,6 +1501,7 @@
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1508,7 +1510,7 @@
     EVT MemVT = N->getMemoryVT();
 
     Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
-                        MemVT, isVolatile, Alignment);
+                        MemVT, isVolatile, isNonTemporal, Alignment);
 
     // Remember the chain.
     Ch = Lo.getValue(1);
@@ -1530,7 +1532,7 @@
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
-                     isVolatile, Alignment);
+                     isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -1542,7 +1544,8 @@
                       DAG.getIntPtrConstant(IncrementSize));
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
                         SVOffset+IncrementSize, NEVT,
-                        isVolatile, MinAlign(Alignment, IncrementSize));
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
 
     // Build a factor node to remember that this load is independent of the
     // other one.
@@ -1560,7 +1563,7 @@
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
                         EVT::getIntegerVT(*DAG.getContext(),
                                           MemVT.getSizeInBits() - ExcessBits),
-                        isVolatile, Alignment);
+                        isVolatile, isNonTemporal, Alignment);
 
     // Increment the pointer to the other half.
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1569,7 +1572,8 @@
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
                         SVOffset+IncrementSize,
                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
-                        isVolatile, MinAlign(Alignment, IncrementSize));
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
 
     // Build a factor node to remember that this load is independent of the
     // other one.
@@ -2212,6 +2216,7 @@
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
   SDValue Lo, Hi;
 
@@ -2220,13 +2225,14 @@
   if (N->getMemoryVT().bitsLE(NVT)) {
     GetExpandedInteger(N->getValue(), Lo, Hi);
     return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                             N->getMemoryVT(), isVolatile, Alignment);
+                             N->getMemoryVT(), isVolatile, isNonTemporal,
+                             Alignment);
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     GetExpandedInteger(N->getValue(), Lo, Hi);
 
     Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                      isVolatile, Alignment);
+                      isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2238,7 +2244,8 @@
                       DAG.getIntPtrConstant(IncrementSize));
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
                            SVOffset+IncrementSize, NEVT,
-                           isVolatile, MinAlign(Alignment, IncrementSize));
+                           isVolatile, isNonTemporal,
+                           MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
   } else {
     // Big-endian - high bits are at low addresses.  Favor aligned stores at
@@ -2264,7 +2271,8 @@
 
     // Store both the high bits and maybe some of the low bits.
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
-                           SVOffset, HiVT, isVolatile, Alignment);
+                           SVOffset, HiVT, isVolatile, isNonTemporal,
+                           Alignment);
 
     // Increment the pointer to the other half.
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -2273,7 +2281,8 @@
     Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
                            SVOffset+IncrementSize,
                            EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
-                           isVolatile, MinAlign(Alignment, IncrementSize));
+                           isVolatile, isNonTemporal,
+                           MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
   }
 }
@@ -2341,7 +2350,7 @@
     // FIXME: Avoid the extend by constructing the right constant pool?
     SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
                                    FudgePtr, NULL, 0, MVT::f32,
-                                   false, Alignment);
+                                   false, false, Alignment);
     return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
   }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 37f36a3..f3e7ca4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -871,9 +871,10 @@
   // the source and destination types.
   SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
+                               false, false, 0);
   // Result is a load from the stack slot.
-  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);
+  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
 }
 
 /// CustomLowerNode - Replace the node's results with custom code provided
@@ -1033,8 +1034,7 @@
     TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
                     false, 0, TLI.getLibcallCallingConv(LC), false,
                     /*isReturnValueUsed=*/true,
-                    Callee, Args, DAG, dl,
-                    DAG.GetOrdering(DAG.getEntryNode().getNode()));
+                    Callee, Args, DAG, dl);
   return CallInfo.first;
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a1b6ced..5e83b4b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -122,10 +122,11 @@
   const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
 
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+                               false, false, 0);
 
   // Load the first half from the stack slot.
-  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +135,7 @@
 
   // Load the second half from the stack slot.
   Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
-                   MinAlign(Alignment, IncrementSize));
+                   false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
   if (TLI.isBigEndian())
@@ -205,11 +206,12 @@
   int SVOffset = LD->getSrcValueOffset();
   unsigned Alignment = LD->getAlignment();
   bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
   Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
-                   isVolatile, Alignment);
+                   isVolatile, isNonTemporal, Alignment);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -217,7 +219,8 @@
                     DAG.getIntPtrConstant(IncrementSize));
   Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
                    SVOffset+IncrementSize,
-                   isVolatile, MinAlign(Alignment, IncrementSize));
+                   isVolatile, isNonTemporal,
+                   MinAlign(Alignment, IncrementSize));
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -383,6 +386,7 @@
   int SVOffset = St->getSrcValueOffset();
   unsigned Alignment = St->getAlignment();
   bool isVolatile = St->isVolatile();
+  bool isNonTemporal = St->isNonTemporal();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -394,14 +398,15 @@
     std::swap(Lo, Hi);
 
   Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
-                    isVolatile, Alignment);
+                    isVolatile, isNonTemporal, Alignment);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
   Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
                     SVOffset + IncrementSize,
-                    isVolatile, MinAlign(Alignment, IncrementSize));
+                    isVolatile, isNonTemporal,
+                    MinAlign(Alignment, IncrementSize));
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bf95bb5..8363c3a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -172,7 +172,8 @@
                                DAG.getUNDEF(N->getBasePtr().getValueType()),
                                N->getSrcValue(), N->getSrcValueOffset(),
                                N->getMemoryVT().getVectorElementType(),
-                               N->isVolatile(), N->getOriginalAlignment());
+                               N->isVolatile(), N->isNonTemporal(),
+                               N->getOriginalAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -366,11 +367,13 @@
                              N->getBasePtr(),
                              N->getSrcValue(), N->getSrcValueOffset(),
                              N->getMemoryVT().getVectorElementType(),
-                             N->isVolatile(), N->getAlignment());
+                             N->isVolatile(), N->isNonTemporal(),
+                             N->getAlignment());
 
   return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
                       N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
-                      N->isVolatile(), N->getOriginalAlignment());
+                      N->isVolatile(), N->isNonTemporal(),
+                      N->getOriginalAlignment());
 }
 
 
@@ -696,17 +699,20 @@
   EVT VecVT = Vec.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
+                               false, false, 0);
 
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   unsigned Alignment =
     TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
-  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+                            false, false, 0);
 
   // Load the Lo part from the stack slot.
-  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+                   false, false, 0);
 
   // Increment the pointer to the other part.
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -715,7 +721,7 @@
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
-                   MinAlign(Alignment, IncrementSize));
+                   false, MinAlign(Alignment, IncrementSize));
 }
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -743,19 +749,20 @@
   EVT MemoryVT = LD->getMemoryVT();
   unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
 
   EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
-                   SV, SVOffset, LoMemVT, isVolatile, Alignment);
+                   SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   SVOffset += IncrementSize;
   Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
-                   SV, SVOffset, HiMemVT, isVolatile, Alignment);
+                   SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -1086,12 +1093,13 @@
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
   const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
+                               false, false, 0);
 
   // Load back the required element.
   StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
-                        SV, 0, EltVT);
+                        SV, 0, EltVT, false, false, 0);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1106,6 +1114,7 @@
   EVT MemoryVT = N->getMemoryVT();
   unsigned Alignment = N->getOriginalAlignment();
   bool isVol = N->isVolatile();
+  bool isNT = N->isNonTemporal();
   SDValue Lo, Hi;
   GetSplitVector(N->getOperand(1), Lo, Hi);
 
@@ -1116,10 +1125,10 @@
 
   if (isTruncating)
     Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                           LoMemVT, isVol, Alignment);
+                           LoMemVT, isVol, isNT, Alignment);
   else
     Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                      isVol, Alignment);
+                      isVol, isNT, Alignment);
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1128,10 +1137,10 @@
 
   if (isTruncating)
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
-                           HiMemVT, isVol, Alignment);
+                           HiMemVT, isVol, isNT, Alignment);
   else
     Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
-                      isVol, Alignment);
+                      isVol, isNT, Alignment);
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
@@ -1242,10 +1251,96 @@
 
 SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   // Binary op widening.
+  unsigned Opcode = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
-  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  EVT VT = WidenVT;
+  unsigned NumElts =  VT.getVectorNumElements();
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+     NumElts = NumElts / 2;
+     VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+  }
+
+  if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+    // Operation doesn't trap so just widen as normal.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+  } else if (NumElts == 1) {
+    // No legal vector version so unroll the vector operation and then widen.
+    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+  } else {
+    // Since the operation can trap, apply operation on the original vector.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+    SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+    unsigned ConcatEnd = 0;  // Current ConcatOps index.
+    unsigned Idx = 0;        // Current Idx into input vectors.
+    while (CurNumElts != 0) {
+      while (CurNumElts >= NumElts) {
+        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+                                   DAG.getIntPtrConstant(Idx));
+        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+                                   DAG.getIntPtrConstant(Idx));
+        ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+        Idx += NumElts;
+        CurNumElts -= NumElts;
+      }
+      EVT PrevVecVT = VT;
+      do {
+        NumElts = NumElts / 2;
+        VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+      } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+      if (NumElts == 1) {
+        // Since we are using concat vector, build a vector from the scalar ops.
+        SDValue VecOp = DAG.getUNDEF(PrevVecVT);
+        for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+          SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+                                     InOp1, DAG.getIntPtrConstant(Idx));
+          SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+                                     InOp2, DAG.getIntPtrConstant(Idx));
+          VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
+                              DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
+                              DAG.getIntPtrConstant(i));
+        }
+        CurNumElts = 0;
+        ConcatOps[ConcatEnd++] = VecOp;
+      }
+    }
+
+    // Check to see if we have a single operation with the widen type.
+    if (ConcatEnd == 1) {
+      VT = ConcatOps[0].getValueType();
+      if (VT == WidenVT)
+        return ConcatOps[0];
+    }
+
+    // Rebuild vector to one with the widen type
+    Idx = ConcatEnd - 1;
+    while (Idx != 0) {
+      VT = ConcatOps[Idx--].getValueType();
+      while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
+        --Idx;
+      if (Idx != 0) {
+        VT = ConcatOps[Idx].getValueType();
+        ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                                     &ConcatOps[Idx+1], ConcatEnd - Idx - 1);
+        ConcatEnd = Idx + 2;
+      }
+    }
+    
+    unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
+    if (NumOps != ConcatEnd ) {
+      SDValue UndefVal = DAG.getUNDEF(VT);
+      for (unsigned j = ConcatEnd; j < NumOps; ++j)
+        ConcatOps[j] = UndefVal;
+    }
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+  }
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -2042,6 +2137,7 @@
   int       SVOffset = LD->getSrcValueOffset();
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
   const Value *SV = LD->getSrcValue();
 
   int LdWidth = LdVT.getSizeInBits();
@@ -2052,7 +2148,7 @@
   EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
   int NewVTWidth = NewVT.getSizeInBits();
   SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
-                             isVolatile, Align);
+                             isVolatile, isNonTemporal, Align);
   LdChain.push_back(LdOp.getValue(1));
 
   // Check if we can load the element with one instruction
@@ -2099,7 +2195,7 @@
 
     SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
                                SVOffset+Offset, isVolatile,
-                               MinAlign(Align, Increment));
+                               isNonTemporal, MinAlign(Align, Increment));
     LdChain.push_back(LdOp.getValue(1));
     LdOps.push_back(LdOp);
 
@@ -2173,6 +2269,7 @@
   int       SVOffset = LD->getSrcValueOffset();
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
   const Value *SV = LD->getSrcValue();
 
   EVT EltVT = WidenVT.getVectorElementType();
@@ -2184,14 +2281,15 @@
   SmallVector<SDValue, 16> Ops(WidenNumElts);
   unsigned Increment = LdEltVT.getSizeInBits() / 8;
   Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
-                          LdEltVT, isVolatile, Align);
+                          LdEltVT, isVolatile, isNonTemporal, Align);
   LdChain.push_back(Ops[0].getValue(1));
   unsigned i = 0, Offset = Increment;
   for (i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
                                      BasePtr, DAG.getIntPtrConstant(Offset));
     Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
-                            SVOffset + Offset, LdEltVT, isVolatile, Align);
+                            SVOffset + Offset, LdEltVT, isVolatile,
+                            isNonTemporal, Align);
     LdChain.push_back(Ops[i].getValue(1));
   }
 
@@ -2215,6 +2313,7 @@
   int      SVOffset = ST->getSrcValueOffset();
   unsigned Align = ST->getAlignment();
   bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
   SDValue  ValOp = GetWidenedVector(ST->getValue());
   DebugLoc dl = ST->getDebugLoc();
 
@@ -2240,6 +2339,7 @@
                                    DAG.getIntPtrConstant(Idx));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
                                        SVOffset + Offset, isVolatile,
+                                       isNonTemporal,
                                        MinAlign(Align, Offset)));
         StWidth -= NewVTWidth;
         Offset += Increment;
@@ -2258,8 +2358,8 @@
         SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
                       DAG.getIntPtrConstant(Idx++));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
-                                   SVOffset + Offset, isVolatile,
-                                   MinAlign(Align, Offset)));
+                                       SVOffset + Offset, isVolatile,
+                                       isNonTemporal, MinAlign(Align, Offset)));
         StWidth -= NewVTWidth;
         Offset += Increment;
         BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2282,6 +2382,7 @@
   int      SVOffset = ST->getSrcValueOffset();
   unsigned Align = ST->getAlignment();
   bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
   SDValue  ValOp = GetWidenedVector(ST->getValue());
   DebugLoc dl = ST->getDebugLoc();
   
@@ -2304,7 +2405,7 @@
                             DAG.getIntPtrConstant(0));
   StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
                                       SVOffset, StEltVT,
-                                      isVolatile, Align));
+                                      isVolatile, isNonTemporal, Align));
   unsigned Offset = Increment;
   for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
@@ -2313,7 +2414,8 @@
                             DAG.getIntPtrConstant(0));
     StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
                                         SVOffset + Offset, StEltVT,
-                                        isVolatile, MinAlign(Align, Offset)));
+                                        isVolatile, isNonTemporal,
+                                        MinAlign(Align, Offset)));
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SDDbgValue.h b/lib/CodeGen/SelectionDAG/SDDbgValue.h
new file mode 100644
index 0000000..9e15fc9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SDDbgValue.h
@@ -0,0 +1,67 @@
+//===-- llvm/CodeGen/SDDbgValue.h - SD dbg_value handling--------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDDBGVALUE_H
+#define LLVM_CODEGEN_SDDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// Either Const or Node is nonzero, but not both.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+  SDNode *Node;           // valid for non-constants
+  unsigned ResNo;         // valid for non-constants
+  Value *Const;           // valid for constants
+  MDNode *mdPtr;
+  uint64_t Offset;
+  DebugLoc DL;
+public:
+  // Constructor for non-constants.
+  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl) :
+    Node(N), ResNo(R), Const(0), mdPtr(mdP), Offset(off), DL(dl) {}
+
+  // Constructor for constants.
+  SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl) : Node(0),
+    ResNo(0), Const(C), mdPtr(mdP), Offset(off), DL(dl) {}
+
+  // Returns the MDNode pointer.
+  MDNode *getMDPtr() { return mdPtr; }
+
+  // Returns the SDNode* (valid for non-constants only).
+  SDNode *getSDNode() { assert (!Const); return Node; }
+
+  // Returns the ResNo (valid for non-constants only).
+  unsigned getResNo() { assert (!Const); return ResNo; }
+
+  // Returns the Value* for a constant (invalid for non-constants).
+  Value *getConst() { assert (!Node); return Const; }
+
+  // Returns the offset.
+  uint64_t getOffset() { return Offset; }
+
+  // Returns the DebugLoc.
+  DebugLoc getDebugLoc() { return DL; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b51c61b..06e7b8c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -218,8 +218,20 @@
   // Check to see if the scheduler cares about latencies.
   bool UnitLatencies = ForceUnitLatencies();
 
-  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
-       E = DAG->allnodes_end(); NI != E; ++NI) {
+  // Add all nodes in depth first order.
+  SmallVector<SDNode*, 64> Worklist;
+  SmallPtrSet<SDNode*, 64> Visited;
+  Worklist.push_back(DAG->getRoot().getNode());
+  Visited.insert(DAG->getRoot().getNode());
+  
+  while (!Worklist.empty()) {
+    SDNode *NI = Worklist.pop_back_val();
+    
+    // Add all operands to the worklist unless they've already been added.
+    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+      if (Visited.insert(NI->getOperand(i).getNode()))
+        Worklist.push_back(NI->getOperand(i).getNode());
+  
     if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
       continue;
     
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6122a2a..746d4e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -468,18 +468,20 @@
 }
 
 /// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries volatility, indexing mode, and
+/// the CSE map that carries volatility, temporalness, indexing mode, and
 /// extension/truncation information.
 ///
 static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) {
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+                     bool isNonTemporal) {
   assert((ConvType & 3) == ConvType &&
          "ConvType may not require more than 2 bits!");
   assert((AM & 7) == AM &&
          "AM may not require more than 3 bits!");
   return ConvType |
          (AM << 2) |
-         (isVolatile << 5);
+         (isVolatile << 5) |
+         (isNonTemporal << 6);
 }
 
 //===----------------------------------------------------------------------===//
@@ -829,6 +831,7 @@
   EntryNode.UseList = 0;
   AllNodes.push_back(&EntryNode);
   Root = getEntryNode();
+  delete Ordering;
   Ordering = new SDNodeOrdering();
 }
 
@@ -859,14 +862,14 @@
 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
 ///
 SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
-  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
   SDValue NegOne =
     getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
   return getNode(ISD::XOR, DL, VT, Val, NegOne);
 }
 
 SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
-  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
   assert((EltVT.getSizeInBits() >= 64 ||
          (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
          "getConstant with a uint64_t value that doesn't fit in the type!");
@@ -880,7 +883,7 @@
 SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
   assert(VT.isInteger() && "Cannot create FP integer constant!");
 
-  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
   assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
          "APInt size does not match type size!");
 
@@ -923,8 +926,7 @@
 SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
 
-  EVT EltVT =
-    VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
 
   // Do the map lookup using the actual bit pattern for the floating point
   // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
@@ -958,8 +960,7 @@
 }
 
 SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
-  EVT EltVT =
-    VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
   if (EltVT==MVT::f32)
     return getConstantFP(APFloat((float)Val), VT, isTarget);
   else
@@ -1344,7 +1345,7 @@
 }
 
 SDValue SelectionDAG::getSrcValue(const Value *V) {
-  assert((!V || isa<PointerType>(V->getType())) &&
+  assert((!V || V->getType()->isPointerTy()) &&
          "SrcValue is not a pointer?");
 
   FoldingSetNodeID ID;
@@ -2232,6 +2233,29 @@
   return false;
 }
 
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+  // If the value is a constant, we can obviously see if it is a zero or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->isZero();
+
+  // TODO: Recognize more cases here.
+
+  return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+  // Check the obvious case.
+  if (A == B) return true;
+
+  // For for negative and positive zero.
+  if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+    if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+      if (CA->isZero() && CB->isZero()) return true;
+
+  // Otherwise they may not be equal.
+  return false;
+}
+
 bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
   if (!GA) return false;
@@ -3080,8 +3104,7 @@
 /// operand.
 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
                               DebugLoc dl) {
-  unsigned NumBits = VT.isVector() ?
-    VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
+  unsigned NumBits = VT.getScalarType().getSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     APInt Val = APInt(NumBits, C->getZExtValue() & 255);
     unsigned Shift = 8;
@@ -3185,7 +3208,7 @@
   bool isSrcConst = isa<ConstantSDNode>(Src);
   EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
   bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
-  if (VT != MVT::iAny) {
+  if (VT != MVT::Other) {
     const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
     unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
     // If source is a string constant, this will require an unaligned load.
@@ -3193,14 +3216,14 @@
       if (Dst.getOpcode() != ISD::FrameIndex) {
         // Can't change destination alignment. It requires a unaligned store.
         if (AllowUnalign)
-          VT = MVT::iAny;
+          VT = MVT::Other;
       } else {
         int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
         MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
         if (MFI->isFixedObjectIndex(FI)) {
           // Can't change destination alignment. It requires a unaligned store.
           if (AllowUnalign)
-            VT = MVT::iAny;
+            VT = MVT::Other;
         } else {
           // Give the stack frame object a larger alignment if needed.
           if (MFI->getObjectAlignment(FI) < NewAlign)
@@ -3211,7 +3234,7 @@
     }
   }
 
-  if (VT == MVT::iAny) {
+  if (VT == MVT::Other) {
     if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
       VT = MVT::i64;
     } else {
@@ -3299,7 +3322,7 @@
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstSV, DstSVOff + DstOff, false, DstAlign);
+                           DstSV, DstSVOff + DstOff, false, false, DstAlign);
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
@@ -3310,10 +3333,11 @@
       assert(NVT.bitsGE(VT));
       Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
-                             SrcSV, SrcSVOff + SrcOff, VT, false, Align);
+                             SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
       Store = DAG.getTruncStore(Chain, dl, Value,
-                             getMemBasePlusOffset(Dst, DstOff, DAG),
-                             DstSV, DstSVOff + DstOff, VT, false, DstAlign);
+                                getMemBasePlusOffset(Dst, DstOff, DAG),
+                                DstSV, DstSVOff + DstOff, VT, false, false,
+                                DstAlign);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -3358,7 +3382,7 @@
 
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
-                        SrcSV, SrcSVOff + SrcOff, false, Align);
+                        SrcSV, SrcSVOff + SrcOff, false, false, Align);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3373,7 +3397,7 @@
 
     Store = DAG.getStore(Chain, dl, LoadValues[i],
                          getMemBasePlusOffset(Dst, DstOff, DAG),
-                         DstSV, DstSVOff + DstOff, false, DstAlign);
+                         DstSV, DstSVOff + DstOff, false, false, DstAlign);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3408,7 +3432,7 @@
     SDValue Value = getMemsetValue(Src, VT, DAG, dl);
     SDValue Store = DAG.getStore(Chain, dl, Value,
                                  getMemBasePlusOffset(Dst, DstOff, DAG),
-                                 DstSV, DstSVOff + DstOff);
+                                 DstSV, DstSVOff + DstOff, false, false, 0);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3472,7 +3496,7 @@
                     /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
                                       TLI.getPointerTy()),
-                    Args, *this, dl, GetOrdering(Chain.getNode()));
+                    Args, *this, dl);
   return CallResult.second;
 }
 
@@ -3521,7 +3545,7 @@
                     /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
                                       TLI.getPointerTy()),
-                    Args, *this, dl, GetOrdering(Chain.getNode()));
+                    Args, *this, dl);
   return CallResult.second;
 }
 
@@ -3580,7 +3604,7 @@
                     /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
                                       TLI.getPointerTy()),
-                    Args, *this, dl, GetOrdering(Chain.getNode()));
+                    Args, *this, dl);
   return CallResult.second;
 }
 
@@ -3788,7 +3812,8 @@
                       ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
                       const Value *SV, int SVOffset, EVT MemVT,
-                      bool isVolatile, unsigned Alignment) {
+                      bool isVolatile, bool isNonTemporal,
+                      unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(VT);
 
@@ -3802,6 +3827,8 @@
   unsigned Flags = MachineMemOperand::MOLoad;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset,
                             MemVT.getStoreSize(), Alignment);
@@ -3840,7 +3867,8 @@
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
   ID.AddInteger(MemVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile()));
+  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -3856,20 +3884,22 @@
 SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
                               const Value *SV, int SVOffset,
-                              bool isVolatile, unsigned Alignment) {
+                              bool isVolatile, bool isNonTemporal,
+                              unsigned Alignment) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
-                 SV, SVOffset, VT, isVolatile, Alignment);
+                 SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  SDValue Chain, SDValue Ptr,
                                  const Value *SV,
                                  int SVOffset, EVT MemVT,
-                                 bool isVolatile, unsigned Alignment) {
+                                 bool isVolatile, bool isNonTemporal,
+                                 unsigned Alignment) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
-                 SV, SVOffset, MemVT, isVolatile, Alignment);
+                 SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue
@@ -3881,12 +3911,13 @@
   return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
                  LD->getChain(), Base, Offset, LD->getSrcValue(),
                  LD->getSrcValueOffset(), LD->getMemoryVT(),
-                 LD->isVolatile(), LD->getAlignment());
+                 LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, const Value *SV, int SVOffset,
-                               bool isVolatile, unsigned Alignment) {
+                               bool isVolatile, bool isNonTemporal,
+                               unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(Val.getValueType());
 
@@ -3900,6 +3931,8 @@
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset,
                             Val.getValueType().getStoreSize(), Alignment);
@@ -3916,7 +3949,8 @@
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile()));
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -3932,7 +3966,8 @@
 SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     SDValue Ptr, const Value *SV,
                                     int SVOffset, EVT SVT,
-                                    bool isVolatile, unsigned Alignment) {
+                                    bool isVolatile, bool isNonTemporal,
+                                    unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(SVT);
 
@@ -3946,6 +3981,8 @@
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
 
@@ -3976,7 +4013,8 @@
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(SVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile()));
+  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4535,91 +4573,13 @@
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
                                    SDVTList VTs, const SDValue *Ops,
                                    unsigned NumOps) {
-  return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+  N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+  // Reset the NodeID to -1.
+  N->setNodeId(-1);
+  return N;
 }
 
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT) {
-  SDVTList VTs = getVTList(VT);
-  return MorphNodeTo(N, Opc, VTs, 0, 0);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT, SDValue Op1) {
-  SDVTList VTs = getVTList(VT);
-  SDValue Ops[] = { Op1 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 1);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT, SDValue Op1,
-                                  SDValue Op2) {
-  SDVTList VTs = getVTList(VT);
-  SDValue Ops[] = { Op1, Op2 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 2);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT, SDValue Op1,
-                                  SDValue Op2, SDValue Op3) {
-  SDVTList VTs = getVTList(VT);
-  SDValue Ops[] = { Op1, Op2, Op3 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 3);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT, const SDValue *Ops,
-                                  unsigned NumOps) {
-  SDVTList VTs = getVTList(VT);
-  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2, const SDValue *Ops,
-                                  unsigned NumOps) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2, EVT VT3,
-                                  const SDValue *Ops, unsigned NumOps) {
-  SDVTList VTs = getVTList(VT1, VT2, VT3);
-  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2,
-                                  SDValue Op1) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 1);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2,
-                                  SDValue Op1, SDValue Op2) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1, Op2 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 2);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2,
-                                  SDValue Op1, SDValue Op2,
-                                  SDValue Op3) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1, Op2, Op3 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 3);
-}
-
-/// MorphNodeTo - These *mutate* the specified node to have the specified
+/// MorphNodeTo - This *mutates* the specified node to have the specified
 /// return type, opcode, and operands.
 ///
 /// Note that MorphNodeTo returns the resultant node.  If there is already a
@@ -4695,12 +4655,14 @@
 
   // Delete any nodes that are still dead after adding the uses for the
   // new operands.
-  SmallVector<SDNode *, 16> DeadNodes;
-  for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
-       E = DeadNodeSet.end(); I != E; ++I)
-    if ((*I)->use_empty())
-      DeadNodes.push_back(*I);
-  RemoveDeadNodes(DeadNodes);
+  if (!DeadNodeSet.empty()) {
+    SmallVector<SDNode *, 16> DeadNodes;
+    for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+         E = DeadNodeSet.end(); I != E; ++I)
+      if ((*I)->use_empty())
+        DeadNodes.push_back(*I);
+    RemoveDeadNodes(DeadNodes);
+  }
 
   if (IP)
     CSEMap.InsertNode(N, IP);   // Memoize the new node.
@@ -4907,6 +4869,43 @@
   return NULL;
 }
 
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+/// This class also manages a "downlink" DAGUpdateListener, to forward
+/// messages to ReplaceAllUsesWith's callers.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+  SelectionDAG::DAGUpdateListener *DownLink;
+  SDNode::use_iterator &UI;
+  SDNode::use_iterator &UE;
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    // Increment the iterator as needed.
+    while (UI != UE && N == *UI)
+      ++UI;
+
+    // Then forward the message.
+    if (DownLink) DownLink->NodeDeleted(N, E);
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Just forward the message.
+    if (DownLink) DownLink->NodeUpdated(N);
+  }
+
+public:
+  RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+                     SDNode::use_iterator &ui,
+                     SDNode::use_iterator &ue)
+    : DownLink(dl), UI(ui), UE(ue) {}
+};
+
+}
+
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
 /// This can cause recursive merging of nodes in the DAG.
 ///
@@ -4927,6 +4926,7 @@
   // is replaced by To, we don't want to replace of all its users with To
   // too. See PR3018 for more info.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -4945,7 +4945,7 @@
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -4971,6 +4971,7 @@
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -4989,7 +4990,7 @@
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -5007,6 +5008,7 @@
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -5026,7 +5028,7 @@
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -5048,6 +5050,7 @@
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From.getNode()->use_begin(),
                        UE = From.getNode()->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
     bool UserRemovedFromCSEMaps = false;
@@ -5083,7 +5086,7 @@
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -5280,8 +5283,11 @@
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
                      MachineMemOperand *mmo)
  : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+                                      MMO->isNonTemporal());
   assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(isNonTemporal() == MMO->isNonTemporal() &&
+         "Non-temporal encoding error!");
   assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
 
@@ -5290,7 +5296,8 @@
                      MachineMemOperand *mmo)
    : SDNode(Opc, dl, VTs, Ops, NumOps),
      MemoryVT(memvt), MMO(mmo) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+                                      MMO->isNonTemporal());
   assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
   assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
@@ -5459,15 +5466,15 @@
         if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
           if (getMachineOpcode() < TII->getNumOpcodes())
             return TII->get(getMachineOpcode()).getName();
-      return "<<Unknown Machine Node>>";
+      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
     }
     if (G) {
       const TargetLowering &TLI = G->getTargetLoweringInfo();
       const char *Name = TLI.getTargetNodeName(getOpcode());
       if (Name) return Name;
-      return "<<Unknown Target Node>>";
+      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
     }
-    return "<<Unknown Node>>";
+    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
 
 #ifndef NDEBUG
   case ISD::DELETED_NODE:
@@ -5904,6 +5911,9 @@
   if (G)
     if (unsigned Order = G->GetOrdering(this))
       OS << " [ORD=" << Order << ']';
+  
+  if (getNodeId() != -1)
+    OS << " [ID=" << getNodeId() << ']';
 }
 
 void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
@@ -6292,31 +6302,37 @@
   return true;
 }
 
+#ifdef XDEBUG
 static void checkForCyclesHelper(const SDNode *N,
-                                 std::set<const SDNode *> &visited) {
-  if (visited.find(N) != visited.end()) {
+                                 SmallPtrSet<const SDNode*, 32> &Visited,
+                                 SmallPtrSet<const SDNode*, 32> &Checked) {
+  // If this node has already been checked, don't check it again.
+  if (Checked.count(N))
+    return;
+  
+  // If a node has already been visited on this depth-first walk, reject it as
+  // a cycle.
+  if (!Visited.insert(N)) {
     dbgs() << "Offending node:\n";
     N->dumprFull();
-    assert(0 && "Detected cycle in SelectionDAG");
+    errs() << "Detected cycle in SelectionDAG\n";
+    abort();
   }
-
-  std::set<const SDNode*>::iterator i;
-  bool inserted;
-
-  tie(i, inserted) = visited.insert(N);
-  assert(inserted && "Missed cycle");
-
-  for(unsigned i = 0; i < N->getNumOperands(); ++i) {
-    checkForCyclesHelper(N->getOperand(i).getNode(), visited);
-  }
-  visited.erase(i);
+  
+  for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+  
+  Checked.insert(N);
+  Visited.erase(N);
 }
+#endif
 
 void llvm::checkForCycles(const llvm::SDNode *N) {
 #ifdef XDEBUG
   assert(N && "Checking nonexistant SDNode");
-  std::set<const SDNode *> visited;
-  checkForCyclesHelper(N, visited);
+  SmallPtrSet<const SDNode*, 32> visited;
+  SmallPtrSet<const SDNode*, 32> checked;
+  checkForCyclesHelper(N, visited, checked);
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index de17f90..05be9a1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -155,7 +155,7 @@
     /// this value and returns the result as a ValueVTs value.  This uses
     /// Chain/Flag as the input and updates them for the output Chain/Flag.
     /// If the Flag pointer is NULL, no flag is used.
-    SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+    SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
                             SDValue &Chain, SDValue *Flag) const;
 
     /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
@@ -163,14 +163,14 @@
     /// Chain/Flag as the input and updates them for the output Chain/Flag.
     /// If the Flag pointer is NULL, no flag is used.
     void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                       unsigned Order, SDValue &Chain, SDValue *Flag) const;
+                       SDValue &Chain, SDValue *Flag) const;
 
     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
     /// operand list.  This adds the code marker, matching input operand index
     /// (if applicable), and includes the number of values added into it.
     void AddInlineAsmOperands(unsigned Code,
                               bool HasMatching, unsigned MatchingIdx,
-                              SelectionDAG &DAG, unsigned Order,
+                              SelectionDAG &DAG,
                               std::vector<SDValue> &Ops) const;
   };
 }
@@ -180,7 +180,7 @@
 /// larger then ValueVT then AssertOp can be used to specify whether the extra
 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
 /// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
                                 const SDValue *Parts,
                                 unsigned NumParts, EVT PartVT, EVT ValueVT,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
@@ -205,9 +205,9 @@
       EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
 
       if (RoundParts > 2) {
-        Lo = getCopyFromParts(DAG, dl, Order, Parts, RoundParts / 2,
+        Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
                               PartVT, HalfVT);
-        Hi = getCopyFromParts(DAG, dl, Order, Parts + RoundParts / 2,
+        Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
                               RoundParts / 2, PartVT, HalfVT);
       } else {
         Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
@@ -223,7 +223,7 @@
         // Assemble the trailing non-power-of-2 part.
         unsigned OddParts = NumParts - RoundParts;
         EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
-        Hi = getCopyFromParts(DAG, dl, Order,
+        Hi = getCopyFromParts(DAG, dl,
                               Parts + RoundParts, OddParts, PartVT, OddVT);
 
         // Combine the round and odd parts.
@@ -259,7 +259,7 @@
         // If the register was not expanded, truncate or copy the value,
         // as appropriate.
         for (unsigned i = 0; i != NumParts; ++i)
-          Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i], 1,
+          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
                                     PartVT, IntermediateVT);
       } else if (NumParts > 0) {
         // If the intermediate type was expanded, build the intermediate
@@ -268,7 +268,7 @@
                "Must expand into a divisible number of parts!");
         unsigned Factor = NumParts / NumIntermediates;
         for (unsigned i = 0; i != NumIntermediates; ++i)
-          Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i * Factor], Factor,
+          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
                                     PartVT, IntermediateVT);
       }
 
@@ -292,7 +292,7 @@
       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
              !PartVT.isVector() && "Unexpected split");
       EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
-      Val = getCopyFromParts(DAG, dl, Order, Parts, NumParts, PartVT, IntVT);
+      Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
     }
   }
 
@@ -349,7 +349,7 @@
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
                            SDValue Val, SDValue *Parts, unsigned NumParts,
                            EVT PartVT,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
@@ -417,7 +417,7 @@
       SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
                                    DAG.getConstant(RoundBits,
                                                    TLI.getPointerTy()));
-      getCopyToParts(DAG, dl, Order, OddVal, Parts + RoundParts,
+      getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
                      OddParts, PartVT);
 
       if (TLI.isBigEndian())
@@ -514,7 +514,7 @@
     // If the register was not expanded, promote or copy the value,
     // as appropriate.
     for (unsigned i = 0; i != NumParts; ++i)
-      getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i], 1, PartVT);
+      getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
   } else if (NumParts > 0) {
     // If the intermediate type was expanded, split each the value into
     // legal parts.
@@ -522,7 +522,7 @@
            "Must expand into a divisible number of parts!");
     unsigned Factor = NumParts / NumIntermediates;
     for (unsigned i = 0; i != NumIntermediates; ++i)
-      getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i*Factor], Factor, PartVT);
+      getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
   }
 }
 
@@ -680,7 +680,7 @@
                                 getCurDebugLoc());
     }
 
-    if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+    if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
              "Unknown struct or array constant!");
 
@@ -747,8 +747,7 @@
 
   RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
   SDValue Chain = DAG.getEntryNode();
-  return RFV.getCopyFromRegs(DAG, getCurDebugLoc(),
-                             SDNodeOrder, Chain, NULL);
+  return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
 }
 
 /// Get the EVTs and ArgFlags collections that represent the legalized return 
@@ -844,19 +843,17 @@
       Chains[i] =
         DAG.getStore(Chain, getCurDebugLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
-                     Add, NULL, Offsets[i], false, 0);
+                     Add, NULL, Offsets[i], false, false, 0);
     }
 
     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                         MVT::Other, &Chains[0], NumValues);
-  } else {
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-      SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
-      unsigned NumValues = ValueVTs.size();
-      if (NumValues == 0) continue;
-
-      SDValue RetOp = getValue(I.getOperand(i));
+  } else if (I.getNumOperands() != 0) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+    unsigned NumValues = ValueVTs.size();
+    if (NumValues) {
+      SDValue RetOp = getValue(I.getOperand(0));
       for (unsigned j = 0, f = NumValues; j != f; ++j) {
         EVT VT = ValueVTs[j];
 
@@ -881,7 +878,7 @@
         unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
         EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
         SmallVector<SDValue, 4> Parts(NumParts);
-        getCopyToParts(DAG, getCurDebugLoc(), SDNodeOrder,
+        getCopyToParts(DAG, getCurDebugLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
                        &Parts[0], NumParts, PartVT, ExtendKind);
 
@@ -1973,7 +1970,7 @@
   if (Cases.size() >= 2)
     // Must recompute end() each iteration because it may be
     // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+    for (CaseItr TmpBegin = Cases.begin(), I = TmpBegin, J = ++TmpBegin; J != Cases.end(); ) {
       const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
       const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
       MachineBasicBlock* nextBB = J->BB;
@@ -2062,9 +2059,15 @@
 }
 
 void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
-  // Update machine-CFG edges.
+  // Update machine-CFG edges with unique successors.
+  SmallVector<BasicBlock*, 32> succs;
+  succs.reserve(I.getNumSuccessors());
   for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
-    CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
+    succs.push_back(I.getSuccessor(i));
+  array_pod_sort(succs.begin(), succs.end());
+  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
+  for (unsigned i = 0, e = succs.size(); i != e; ++i)
+    CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                           MVT::Other, getControlRoot(),
@@ -2074,7 +2077,7 @@
 void SelectionDAGBuilder::visitFSub(User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
-  if (isa<VectorType>(Ty)) {
+  if (Ty->isVectorTy()) {
     if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
       const VectorType *DestTy = cast<VectorType>(I.getType());
       const Type *ElTy = DestTy->getElementType();
@@ -2111,7 +2114,7 @@
 void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
-  if (!isa<VectorType>(I.getType()) &&
+  if (!I.getType()->isVectorTy() &&
       Op2.getValueType() != TLI.getShiftAmountTy()) {
     // If the operand is smaller than the shift count type, promote it.
     EVT PTy = TLI.getPointerTy();
@@ -2699,7 +2702,9 @@
   SDValue Ptr = getValue(SV);
 
   const Type *Ty = I.getType();
+
   bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
 
   SmallVector<EVT, 4> ValueVTs;
@@ -2731,7 +2736,8 @@
                             PtrVT, Ptr,
                             DAG.getConstant(Offsets[i], PtrVT));
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                            A, SV, Offsets[i], isVolatile, Alignment);
+                            A, SV, Offsets[i], isVolatile, 
+                            isNonTemporal, Alignment);
 
     Values[i] = L;
     Chains[i] = L.getValue(1);
@@ -2772,6 +2778,7 @@
   SmallVector<SDValue, 4> Chains(NumValues);
   EVT PtrVT = Ptr.getValueType();
   bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
 
   for (unsigned i = 0; i != NumValues; ++i) {
@@ -2779,7 +2786,8 @@
                               DAG.getConstant(Offsets[i], PtrVT));
     Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
                              SDValue(Src.getNode(), Src.getResNo() + i),
-                             Add, PtrV, Offsets[i], isVolatile, Alignment);
+                             Add, PtrV, Offsets[i], isVolatile, 
+                             isNonTemporal, Alignment);
   }
 
   DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -2879,7 +2887,7 @@
 ///
 /// where Op is the hexidecimal representation of floating point value.
 static SDValue
-GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
   SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
                            DAG.getConstant(0x007fffff, MVT::i32));
   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
@@ -2894,7 +2902,7 @@
 /// where Op is the hexidecimal representation of floating point value.
 static SDValue
 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
-            DebugLoc dl, unsigned Order) {
+            DebugLoc dl) {
   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
                            DAG.getConstant(0x7f800000, MVT::i32));
   SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
@@ -3078,13 +3086,13 @@
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
     // Scale the exponent by log(2) [0.69314718f].
-    SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
                                         getF32Constant(DAG, 0x3f317218));
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+    SDValue X = GetSignificand(DAG, Op1, dl);
 
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
@@ -3188,11 +3196,11 @@
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
     // Get the exponent.
-    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+    SDValue X = GetSignificand(DAG, Op1, dl);
 
     // Different possible minimax approximations of significand in
     // floating-point for various degrees of accuracy over [1,2].
@@ -3297,13 +3305,13 @@
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
     // Scale the exponent by log10(2) [0.30102999f].
-    SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
                                         getF32Constant(DAG, 0x3e9a209a));
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+    SDValue X = GetSignificand(DAG, Op1, dl);
 
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
@@ -4058,7 +4066,7 @@
     // Store the stack protector onto the stack.
     Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
                        PseudoSourceValue::getFixedStack(FI),
-                       0, true);
+                       0, true, false, 0);
     setValue(&I, Res);
     DAG.setRoot(Res);
     return 0;
@@ -4276,8 +4284,8 @@
     // Check for a truly no-op bitcast.
     if (isa<BitCastInst>(U) &&
         (U->getOperand(0)->getType() == U->getType() ||
-         (isa<PointerType>(U->getOperand(0)->getType()) &&
-          isa<PointerType>(U->getType()))))
+         (U->getOperand(0)->getType()->isPointerTy() &&
+          U->getType()->isPointerTy())))
       continue;
     // Otherwise it's not a true no-op.
     return false;
@@ -4385,7 +4393,7 @@
                     CS.getCallingConv(),
                     isTailCall,
                     !CS.getInstruction()->use_empty(),
-                    Callee, Args, DAG, getCurDebugLoc(), SDNodeOrder);
+                    Callee, Args, DAG, getCurDebugLoc());
   assert((isTailCall || Result.second.getNode()) &&
          "Non-null chain expected with non-tail call!");
   assert((Result.second.getNode() || !Result.first.getNode()) &&
@@ -4410,7 +4418,7 @@
                                 DemoteStackSlot,
                                 DAG.getConstant(Offsets[i], PtrVT));
       SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
-                              Add, NULL, Offsets[i], false, 1);
+                              Add, NULL, Offsets[i], false, false, 1);
       Values[i] = L;
       Chains[i] = L.getValue(1);
     }
@@ -4433,7 +4441,7 @@
       unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
   
       SDValue ReturnValue =
-        getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs,
+        getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
                          RegisterVT, VT, AssertOp);
       ReturnValues.push_back(ReturnValue);
       CurReg += NumRegs;
@@ -4512,7 +4520,8 @@
   SDValue Ptr = Builder.getValue(PtrVal);
   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
                                         Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
-                                        false /*volatile*/, 1 /* align=1 */);
+                                        false /*volatile*/,
+                                        false /*nontemporal*/, 1 /* align=1 */);
 
   if (!ConstantMemory)
     Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -4529,9 +4538,9 @@
     return false;
 
   Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
-  if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
-      !isa<IntegerType>(I.getOperand(3)->getType()) ||
-      !isa<IntegerType>(I.getType()))
+  if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+      !I.getOperand(3)->getType()->isIntegerTy() ||
+      !I.getType()->isIntegerTy())
     return false;
 
   ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
@@ -4625,7 +4634,7 @@
       StringRef Name = F->getName();
       if (Name == "copysign" || Name == "copysignf") {
         if (I.getNumOperands() == 3 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.getType() == I.getOperand(2)->getType()) {
           SDValue LHS = getValue(I.getOperand(1));
@@ -4636,7 +4645,7 @@
         }
       } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType()) {
           SDValue Tmp = getValue(I.getOperand(1));
           setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
@@ -4645,7 +4654,7 @@
         }
       } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -4655,7 +4664,7 @@
         }
       } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -4665,7 +4674,7 @@
         }
       } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -4699,8 +4708,7 @@
 /// Chain/Flag as the input and updates them for the output Chain/Flag.
 /// If the Flag pointer is NULL, no flag is used.
 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
-                                      unsigned Order, SDValue &Chain,
-                                      SDValue *Flag) const {
+                                      SDValue &Chain, SDValue *Flag) const {
   // Assemble the legal parts into the final values.
   SmallVector<SDValue, 4> Values(ValueVTs.size());
   SmallVector<SDValue, 8> Parts;
@@ -4765,7 +4773,7 @@
       Parts[i] = P;
     }
 
-    Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(),
+    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
                                      NumRegs, RegisterVT, ValueVT);
     Part += NumRegs;
     Parts.clear();
@@ -4781,8 +4789,7 @@
 /// Chain/Flag as the input and updates them for the output Chain/Flag.
 /// If the Flag pointer is NULL, no flag is used.
 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                                 unsigned Order, SDValue &Chain,
-                                 SDValue *Flag) const {
+                                 SDValue &Chain, SDValue *Flag) const {
   // Get the list of the values's legal parts.
   unsigned NumRegs = Regs.size();
   SmallVector<SDValue, 8> Parts(NumRegs);
@@ -4791,7 +4798,7 @@
     unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
     EVT RegisterVT = RegVTs[Value];
 
-    getCopyToParts(DAG, dl, Order,
+    getCopyToParts(DAG, dl,
                    Val.getValue(Val.getResNo() + Value),
                    &Parts[Part], NumParts, RegisterVT);
     Part += NumParts;
@@ -4832,7 +4839,7 @@
 /// values added into it.
 void RegsForValue::AddInlineAsmOperands(unsigned Code,
                                         bool HasMatching,unsigned MatchingIdx,
-                                        SelectionDAG &DAG, unsigned Order,
+                                        SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
   assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
   unsigned Flag = Code | (Regs.size() << 3);
@@ -5330,7 +5337,8 @@
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
         Chain = DAG.getStore(Chain, getCurDebugLoc(),
-                             OpInfo.CallOperand, StackSlot, NULL, 0);
+                             OpInfo.CallOperand, StackSlot, NULL, 0,
+                             false, false, 0);
         OpInfo.CallOperand = StackSlot;
       }
 
@@ -5421,7 +5429,7 @@
                                                2 /* REGDEF */ ,
                                                false,
                                                0,
-                                               DAG, SDNodeOrder,
+                                               DAG,
                                                AsmNodeOperands);
       break;
     }
@@ -5469,10 +5477,10 @@
 
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                    SDNodeOrder, Chain, &Flag);
+                                    Chain, &Flag);
           MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
                                            true, OpInfo.getMatchedOperand(),
-                                           DAG, SDNodeOrder, AsmNodeOperands);
+                                           DAG, AsmNodeOperands);
           break;
         } else {
           assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
@@ -5533,11 +5541,10 @@
       }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                        SDNodeOrder, Chain, &Flag);
+                                        Chain, &Flag);
 
       OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
-                                               DAG, SDNodeOrder,
-                                               AsmNodeOperands);
+                                               DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isClobber: {
@@ -5545,7 +5552,7 @@
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
         OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
-                                                 false, 0, DAG, SDNodeOrder,
+                                                 false, 0, DAG,
                                                  AsmNodeOperands);
       break;
     }
@@ -5565,7 +5572,7 @@
   // and set it as the value of the call.
   if (!RetValRegs.Regs.empty()) {
     SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
-                                             SDNodeOrder, Chain, &Flag);
+                                             Chain, &Flag);
 
     // FIXME: Why don't we do this for inline asms with MRVs?
     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -5605,7 +5612,7 @@
     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
     Value *Ptr = IndirectStoresToEmit[i].second;
     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
-                                             SDNodeOrder, Chain, &Flag);
+                                             Chain, &Flag);
     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
 
   }
@@ -5616,7 +5623,8 @@
     SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
                                StoresToEmit[i].first,
                                getValue(StoresToEmit[i].second),
-                               StoresToEmit[i].second, 0);
+                               StoresToEmit[i].second, 0,
+                               false, false, 0);
     OutChains.push_back(Val);
   }
 
@@ -5669,8 +5677,7 @@
                             CallingConv::ID CallConv, bool isTailCall,
                             bool isReturnValueUsed,
                             SDValue Callee,
-                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl,
-                            unsigned Order) {
+                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
   // Handle all of the outgoing arguments.
   SmallVector<ISD::OutputArg, 32> Outs;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
@@ -5721,7 +5728,7 @@
       else if (Args[i].isZExt)
         ExtendKind = ISD::ZERO_EXTEND;
 
-      getCopyToParts(DAG, dl, Order, Op, &Parts[0], NumParts,
+      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
                      PartVT, ExtendKind);
 
       for (unsigned j = 0; j != NumParts; ++j) {
@@ -5800,7 +5807,7 @@
     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
 
-    ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg],
+    ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
                                             NumRegs, RegisterVT, VT,
                                             AssertOp));
     CurReg += NumRegs;
@@ -5840,7 +5847,7 @@
 
   RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
   SDValue Chain = DAG.getEntryNode();
-  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), SDNodeOrder, Chain, 0);
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
   PendingExports.push_back(Chain);
 }
 
@@ -5966,7 +5973,7 @@
     EVT VT = ValueVTs[0];
     EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
     ISD::NodeType AssertOp = ISD::DELETED_NODE;
-    SDValue ArgValue = getCopyFromParts(DAG, dl, 0, &InVals[0], 1,
+    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
                                         RegVT, VT, AssertOp);
 
     MachineFunction& MF = SDB->DAG.getMachineFunction();
@@ -6000,7 +6007,7 @@
         else if (F.paramHasAttr(Idx, Attribute::ZExt))
           AssertOp = ISD::AssertZext;
 
-        ArgValues.push_back(getCopyFromParts(DAG, dl, 0, &InVals[i],
+        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
                                              NumParts, PartVT, VT,
                                              AssertOp));
       }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index da2e6e4..05f9f1f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -56,9 +56,12 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
 #include <algorithm>
 using namespace llvm;
 
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -723,9 +726,9 @@
   // code to the MachineBasicBlock.
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("Instruction Selection", GroupName);
-    InstructionSelect();
+    DoInstructionSelection();
   } else {
-    InstructionSelect();
+    DoInstructionSelection();
   }
 
   DEBUG(dbgs() << "Selected selection DAG:\n");
@@ -765,6 +768,66 @@
   DEBUG(BB->dump());
 }
 
+void SelectionDAGISel::DoInstructionSelection() {
+  DEBUG(errs() << "===== Instruction selection begins:\n");
+
+  PreprocessISelDAG();
+  
+  // Select target instructions for the DAG.
+  {
+    // Number all nodes with a topological order and set DAGSize.
+    DAGSize = CurDAG->AssignTopologicalOrder();
+    
+    // Create a dummy node (which is not added to allnodes), that adds
+    // a reference to the root node, preventing it from being deleted,
+    // and tracking any changes of the root.
+    HandleSDNode Dummy(CurDAG->getRoot());
+    ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+    ++ISelPosition;
+    
+    // The AllNodes list is now topological-sorted. Visit the
+    // nodes by starting at the end of the list (the root of the
+    // graph) and preceding back toward the beginning (the entry
+    // node).
+    while (ISelPosition != CurDAG->allnodes_begin()) {
+      SDNode *Node = --ISelPosition;
+      // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+      // but there are currently some corner cases that it misses. Also, this
+      // makes it theoretically possible to disable the DAGCombiner.
+      if (Node->use_empty())
+        continue;
+      
+      SDNode *ResNode = Select(Node);
+      
+      // FIXME: This is pretty gross.  'Select' should be changed to not return
+      // anything at all and this code should be nuked with a tactical strike.
+      
+      // If node should not be replaced, continue with the next one.
+      if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+        continue;
+      // Replace node.
+      if (ResNode)
+        ReplaceUses(Node, ResNode);
+      
+      // If after the replacement this node is not used any more,
+      // remove this dead node.
+      if (Node->use_empty()) { // Don't delete EntryToken, etc.
+        ISelUpdater ISU(ISelPosition);
+        CurDAG->RemoveDeadNode(Node, &ISU);
+      }
+    }
+    
+    CurDAG->setRoot(Dummy.getValue());
+  }    
+  DEBUG(errs() << "===== Instruction selection ends:\n");
+
+  PostprocessISelDAG();
+  
+  // FIXME: This shouldn't be needed, remove it.
+  CurDAG->RemoveDeadNodes();
+}
+
+
 void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
                                             MachineFunction &MF,
                                             MachineModuleInfo *MMI,
@@ -870,6 +933,7 @@
         // feed PHI nodes in successor blocks.
         if (isa<TerminatorInst>(BI))
           if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+            ++NumFastIselFailures;
             ResetDebugLoc(SDB, FastIS);
             if (EnableFastISelVerbose || EnableFastISelAbort) {
               dbgs() << "FastISel miss: ";
@@ -894,6 +958,7 @@
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
+          ++NumFastIselFailures;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel missed call: ";
             BI->dump();
@@ -923,6 +988,7 @@
         // Otherwise, give up on FastISel for the rest of the block.
         // For now, be a little lenient about non-branch terminators.
         if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+          ++NumFastIselFailures;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel miss: ";
             BI->dump();
@@ -972,6 +1038,8 @@
       MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
+      if (!BB->isSuccessor(PHI->getParent()))
+        continue;
       PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
                                                 false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
@@ -1316,13 +1384,29 @@
 /// This function recursively traverses up the operand chain, ignoring
 /// certain nodes.
 static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
-                          SDNode *Root,
-                          SmallPtrSet<SDNode*, 16> &Visited) {
-  if (Use->getNodeId() < Def->getNodeId() ||
-      !Visited.insert(Use))
+                          SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+                          bool IgnoreChains) {
+  // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+  // greater than all of its (recursive) operands.  If we scan to a point where
+  // 'use' is smaller than the node we're scanning for, then we know we will
+  // never find it.
+  //
+  // The Use may be -1 (unassigned) if it is a newly allocated node.  This can
+  // happen because we scan down to newly selected nodes in the case of flag
+  // uses.
+  if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+    return false;
+  
+  // Don't revisit nodes if we already scanned it and didn't fail, we know we
+  // won't fail if we scan it again.
+  if (!Visited.insert(Use))
     return false;
 
   for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+    // Ignore chain uses, they are validated by HandleMergeInputChains.
+    if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+      continue;
+    
     SDNode *N = Use->getOperand(i).getNode();
     if (N == Def) {
       if (Use == ImmedUse || Use == Root)
@@ -1332,32 +1416,24 @@
     }
 
     // Traverse up the operand chain.
-    if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
+    if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
       return true;
   }
   return false;
 }
 
-/// isNonImmUse - Start searching from Root up the DAG to check is Def can
-/// be reached. Return true if that's the case. However, ignore direct uses
-/// by ImmedUse (which would be U in the example illustrated in
-/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
-/// case).
-/// FIXME: to be really generic, we should allow direct use by any node
-/// that is being folded. But realisticly since we only fold loads which
-/// have one non-chain use, we only need to watch out for load/op/store
-/// and load/op/cmp case where the root (store / cmp) may reach the load via
-/// its chain operand.
-static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
-  SmallPtrSet<SDNode*, 16> Visited;
-  return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+                                          SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
+  return N.hasOneUse();
 }
 
-/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
-/// U can be folded during instruction selection that starts at Root and
-/// folding N is profitable.
-bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                                  SDNode *Root) const {
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+                                     bool IgnoreChains) const {
   if (OptLevel == CodeGenOpt::None) return false;
 
   // If Root use can somehow reach N through a path that that doesn't contain
@@ -1402,6 +1478,8 @@
   // Fold. But since Fold and FU are flagged together, this will create
   // a cycle in the scheduling graph.
 
+  // If the node has flags, walk down the graph to the "lowest" node in the
+  // flagged set.
   EVT VT = Root->getValueType(Root->getNumValues()-1);
   while (VT == MVT::Flag) {
     SDNode *FU = findFlagUse(Root);
@@ -1409,9 +1487,17 @@
       break;
     Root = FU;
     VT = Root->getValueType(Root->getNumValues()-1);
+    
+    // If our query node has a flag result with a use, we've walked up it.  If
+    // the user (which has already been selected) has a chain or indirectly uses
+    // the chain, our WalkChainUsers predicate will not consider it.  Because of
+    // this, we cannot ignore chains in this predicate.
+    IgnoreChains = false;
   }
+  
 
-  return !isNonImmUse(Root, N, U);
+  SmallPtrSet<SDNode*, 16> Visited;
+  return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
 }
 
 SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -1423,6 +1509,7 @@
   VTs.push_back(MVT::Flag);
   SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
                                 VTs, &Ops[0], Ops.size());
+  New->setNodeId(-1);
   return New.getNode();
 }
 
@@ -1438,25 +1525,1219 @@
                               MVT::Other, Tmp, Chain);
 }
 
+/// GetVBR - decode a vbr encoding whose top bit is set.
+ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+  assert(Val >= 128 && "Not a VBR");
+  Val &= 127;  // Remove first vbr bit.
+  
+  unsigned Shift = 7;
+  uint64_t NextBits;
+  do {
+    NextBits = MatcherTable[Idx++];
+    Val |= (NextBits&127) << Shift;
+    Shift += 7;
+  } while (NextBits & 128);
+  
+  return Val;
+}
+
+
+/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
+/// interior flag and chain results to use the new flag and chain results.
+void SelectionDAGISel::
+UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
+                     const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                     SDValue InputFlag,
+                     const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
+                     bool isMorphNodeTo) {
+  SmallVector<SDNode*, 4> NowDeadNodes;
+  
+  ISelUpdater ISU(ISelPosition);
+
+  // Now that all the normal results are replaced, we replace the chain and
+  // flag results if present.
+  if (!ChainNodesMatched.empty()) {
+    assert(InputChain.getNode() != 0 &&
+           "Matched input chains but didn't produce a chain");
+    // Loop over all of the nodes we matched that produced a chain result.
+    // Replace all the chain results with the final chain we ended up with.
+    for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+      SDNode *ChainNode = ChainNodesMatched[i];
+      
+      // If this node was already deleted, don't look at it.
+      if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+        continue;
+      
+      // Don't replace the results of the root node if we're doing a
+      // MorphNodeTo.
+      if (ChainNode == NodeToMatch && isMorphNodeTo)
+        continue;
+      
+      SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+      if (ChainVal.getValueType() == MVT::Flag)
+        ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+      assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+      CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+      
+      // If the node became dead, delete it.
+      if (ChainNode->use_empty())
+        NowDeadNodes.push_back(ChainNode);
+    }
+  }
+  
+  // If the result produces a flag, update any flag results in the matched
+  // pattern with the flag result.
+  if (InputFlag.getNode() != 0) {
+    // Handle any interior nodes explicitly marked.
+    for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
+      SDNode *FRN = FlagResultNodesMatched[i];
+      
+      // If this node was already deleted, don't look at it.
+      if (FRN->getOpcode() == ISD::DELETED_NODE)
+        continue;
+      
+      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
+             "Doesn't have a flag result");
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+                                        InputFlag, &ISU);
+      
+      // If the node became dead, delete it.
+      if (FRN->use_empty())
+        NowDeadNodes.push_back(FRN);
+    }
+  }
+  
+  if (!NowDeadNodes.empty())
+    CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+  
+  DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+  CR_Simple,
+  CR_InducesCycle,
+  CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult 
+WalkChainUsers(SDNode *ChainedNode,
+               SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+               SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+  ChainResult Result = CR_Simple;
+  
+  for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+         E = ChainedNode->use_end(); UI != E; ++UI) {
+    // Make sure the use is of the chain, not some other value we produce.
+    if (UI.getUse().getValueType() != MVT::Other) continue;
+    
+    SDNode *User = *UI;
+
+    // If we see an already-selected machine node, then we've gone beyond the
+    // pattern that we're selecting down into the already selected chunk of the
+    // DAG.
+    if (User->isMachineOpcode() ||
+        User->getOpcode() == ISD::HANDLENODE)  // Root of the graph.
+      continue;
+    
+    if (User->getOpcode() == ISD::CopyToReg ||
+        User->getOpcode() == ISD::CopyFromReg ||
+        User->getOpcode() == ISD::INLINEASM) {
+      // If their node ID got reset to -1 then they've already been selected.
+      // Treat them like a MachineOpcode.
+      if (User->getNodeId() == -1)
+        continue;
+    }
+
+    // If we have a TokenFactor, we handle it specially.
+    if (User->getOpcode() != ISD::TokenFactor) {
+      // If the node isn't a token factor and isn't part of our pattern, then it
+      // must be a random chained node in between two nodes we're selecting.
+      // This happens when we have something like:
+      //   x = load ptr
+      //   call
+      //   y = x+4
+      //   store y -> ptr
+      // Because we structurally match the load/store as a read/modify/write,
+      // but the call is chained between them.  We cannot fold in this case
+      // because it would induce a cycle in the graph.
+      if (!std::count(ChainedNodesInPattern.begin(),
+                      ChainedNodesInPattern.end(), User))
+        return CR_InducesCycle;
+      
+      // Otherwise we found a node that is part of our pattern.  For example in:
+      //   x = load ptr
+      //   y = x+4
+      //   store y -> ptr
+      // This would happen when we're scanning down from the load and see the
+      // store as a user.  Record that there is a use of ChainedNode that is
+      // part of the pattern and keep scanning uses.
+      Result = CR_LeadsToInteriorNode;
+      InteriorChainedNodes.push_back(User);
+      continue;
+    }
+    
+    // If we found a TokenFactor, there are two cases to consider: first if the
+    // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+    // uses of the TF are in our pattern) we just want to ignore it.  Second,
+    // the TokenFactor can be sandwiched in between two chained nodes, like so:
+    //     [Load chain]
+    //         ^
+    //         |
+    //       [Load]
+    //       ^    ^
+    //       |    \                    DAG's like cheese
+    //      /       \                       do you?
+    //     /         |
+    // [TokenFactor] [Op]
+    //     ^          ^
+    //     |          |
+    //      \        /
+    //       \      /
+    //       [Store]
+    //
+    // In this case, the TokenFactor becomes part of our match and we rewrite it
+    // as a new TokenFactor.
+    //
+    // To distinguish these two cases, do a recursive walk down the uses.
+    switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+    case CR_Simple:
+      // If the uses of the TokenFactor are just already-selected nodes, ignore
+      // it, it is "below" our pattern.
+      continue;
+    case CR_InducesCycle:
+      // If the uses of the TokenFactor lead to nodes that are not part of our
+      // pattern that are not selected, folding would turn this into a cycle,
+      // bail out now.
+      return CR_InducesCycle;
+    case CR_LeadsToInteriorNode:
+      break;  // Otherwise, keep processing.
+    }
+    
+    // Okay, we know we're in the interesting interior case.  The TokenFactor
+    // is now going to be considered part of the pattern so that we rewrite its
+    // uses (it may have uses that are not part of the pattern) with the
+    // ultimate chain result of the generated code.  We will also add its chain
+    // inputs as inputs to the ultimate TokenFactor we create.
+    Result = CR_LeadsToInteriorNode;
+    ChainedNodesInPattern.push_back(User);
+    InteriorChainedNodes.push_back(User);
+    continue;
+  }
+  
+  return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains.  The
+/// input vector contains a list of all of the chained nodes that we match.  We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                       SelectionDAG *CurDAG) {
+  // Walk all of the chained nodes we've matched, recursively scanning down the
+  // users of the chain result. This adds any TokenFactor nodes that are caught
+  // in between chained nodes to the chained and interior nodes list.
+  SmallVector<SDNode*, 3> InteriorChainedNodes;
+  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+    if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+                       InteriorChainedNodes) == CR_InducesCycle)
+      return SDValue(); // Would induce a cycle.
+  }
+  
+  // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+  // that we are interested in.  Form our input TokenFactor node.
+  SmallVector<SDValue, 3> InputChains;
+  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+    // Add the input chain of this node to the InputChains list (which will be
+    // the operands of the generated TokenFactor) if it's not an interior node.
+    SDNode *N = ChainNodesMatched[i];
+    if (N->getOpcode() != ISD::TokenFactor) {
+      if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+        continue;
+      
+      // Otherwise, add the input chain.
+      SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+      assert(InChain.getValueType() == MVT::Other && "Not a chain");
+      InputChains.push_back(InChain);
+      continue;
+    }
+    
+    // If we have a token factor, we want to add all inputs of the token factor
+    // that are not part of the pattern we're matching.
+    for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+      if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+                      N->getOperand(op).getNode()))
+        InputChains.push_back(N->getOperand(op));
+    }
+  }
+  
+  SDValue Res;
+  if (InputChains.size() == 1)
+    return InputChains[0];
+  return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+                         MVT::Other, &InputChains[0], InputChains.size());
+}  
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+          const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+  // It is possible we're using MorphNodeTo to replace a node with no
+  // normal results with one that has a normal result (or we could be
+  // adding a chain) and the input could have flags and chains as well.
+  // In this case we need to shifting the operands down.
+  // FIXME: This is a horrible hack and broken in obscure cases, no worse
+  // than the old isel though.  We should sink this into MorphNodeTo.
+  int OldFlagResultNo = -1, OldChainResultNo = -1;
+
+  unsigned NTMNumResults = Node->getNumValues();
+  if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
+    OldFlagResultNo = NTMNumResults-1;
+    if (NTMNumResults != 1 &&
+        Node->getValueType(NTMNumResults-2) == MVT::Other)
+      OldChainResultNo = NTMNumResults-2;
+  } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+    OldChainResultNo = NTMNumResults-1;
+
+  // Call the underlying SelectionDAG routine to do the transmogrification. Note
+  // that this deletes operands of the old node that become dead.
+  SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+  // MorphNodeTo can operate in two ways: if an existing node with the
+  // specified operands exists, it can just return it.  Otherwise, it
+  // updates the node in place to have the requested operands.
+  if (Res == Node) {
+    // If we updated the node in place, reset the node ID.  To the isel,
+    // this should be just like a newly allocated machine node.
+    Res->setNodeId(-1);
+  }
+
+  unsigned ResNumResults = Res->getNumValues();
+  // Move the flag if needed.
+  if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
+      (unsigned)OldFlagResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo), 
+                                      SDValue(Res, ResNumResults-1));
+
+  if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+  --ResNumResults;
+
+  // Move the chain reference if needed.
+  if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+      (unsigned)OldChainResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), 
+                                      SDValue(Res, ResNumResults-1));
+
+  // Otherwise, no replacement happened because the node already exists. Replace
+  // Uses of the old node with the new one.
+  if (Res != Node)
+    CurDAG->ReplaceAllUsesWith(Node, Res);
+  
+  return Res;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+          SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+  // Accept if it is exactly the same as a previously recorded node.
+  unsigned RecNo = MatcherTable[MatcherIndex++];
+  assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+  return N == RecordedNodes[RecNo];
+}
+  
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                      SelectionDAGISel &SDISel) {
+  return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                   SelectionDAGISel &SDISel, SDNode *N) {
+  return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+            SDNode *N) {
+  return N->getOpcode() == MatcherTable[MatcherIndex++];
+}
+
+ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+          SDValue N, const TargetLowering &TLI) {
+  MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+  if (N.getValueType() == VT) return true;
+  
+  // Handle the case when VT is iPTR.
+  return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+               SDValue N, const TargetLowering &TLI,
+               unsigned ChildNo) {
+  if (ChildNo >= N.getNumOperands())
+    return false;  // Match fails if out of range child #.
+  return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+              SDValue N) {
+  return cast<CondCodeSDNode>(N)->get() ==
+      (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+               SDValue N, const TargetLowering &TLI) {
+  MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+  if (cast<VTSDNode>(N)->getVT() == VT)
+    return true;
+  
+  // Handle the case when VT is iPTR.
+  return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+             SDValue N) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+  
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+  return C != 0 && C->getSExtValue() == Val;
+}
+
+ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+            SDValue N, SelectionDAGISel &SDISel) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+  
+  if (N->getOpcode() != ISD::AND) return false;
+  
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+           SDValue N, SelectionDAGISel &SDISel) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+  
+  if (N->getOpcode() != ISD::OR) return false;
+  
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node.  If the current predicate is known to
+/// fail, set Result=true and return anything.  If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with.  If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with. 
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+                                       unsigned Index, SDValue N,
+                                       bool &Result, SelectionDAGISel &SDISel,
+                                       SmallVectorImpl<SDValue> &RecordedNodes){
+  switch (Table[Index++]) {
+  default:
+    Result = false;
+    return Index-1;  // Could not evaluate this predicate.
+  case SelectionDAGISel::OPC_CheckSame:
+    Result = !::CheckSame(Table, Index, N, RecordedNodes);
+    return Index;
+  case SelectionDAGISel::OPC_CheckPatternPredicate:
+    Result = !::CheckPatternPredicate(Table, Index, SDISel);
+    return Index;
+  case SelectionDAGISel::OPC_CheckPredicate:
+    Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+    return Index;
+  case SelectionDAGISel::OPC_CheckOpcode:
+    Result = !::CheckOpcode(Table, Index, N.getNode());
+    return Index;
+  case SelectionDAGISel::OPC_CheckType:
+    Result = !::CheckType(Table, Index, N, SDISel.TLI);
+    return Index;
+  case SelectionDAGISel::OPC_CheckChild0Type:
+  case SelectionDAGISel::OPC_CheckChild1Type:
+  case SelectionDAGISel::OPC_CheckChild2Type:
+  case SelectionDAGISel::OPC_CheckChild3Type:
+  case SelectionDAGISel::OPC_CheckChild4Type:
+  case SelectionDAGISel::OPC_CheckChild5Type:
+  case SelectionDAGISel::OPC_CheckChild6Type:
+  case SelectionDAGISel::OPC_CheckChild7Type:
+    Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+                        Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+    return Index;
+  case SelectionDAGISel::OPC_CheckCondCode:
+    Result = !::CheckCondCode(Table, Index, N);
+    return Index;
+  case SelectionDAGISel::OPC_CheckValueType:
+    Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+    return Index;
+  case SelectionDAGISel::OPC_CheckInteger:
+    Result = !::CheckInteger(Table, Index, N);
+    return Index;
+  case SelectionDAGISel::OPC_CheckAndImm:
+    Result = !::CheckAndImm(Table, Index, N, SDISel);
+    return Index;
+  case SelectionDAGISel::OPC_CheckOrImm:
+    Result = !::CheckOrImm(Table, Index, N, SDISel);
+    return Index;
+  }
+}
+
+
+struct MatchScope {
+  /// FailIndex - If this match fails, this is the index to continue with.
+  unsigned FailIndex;
+  
+  /// NodeStack - The node stack when the scope was formed.
+  SmallVector<SDValue, 4> NodeStack;
+  
+  /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+  unsigned NumRecordedNodes;
+  
+  /// NumMatchedMemRefs - The number of matched memref entries.
+  unsigned NumMatchedMemRefs;
+  
+  /// InputChain/InputFlag - The current chain/flag 
+  SDValue InputChain, InputFlag;
+
+  /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+  bool HasChainNodesMatched, HasFlagResultNodesMatched;
+};
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+                 unsigned TableSize) {
+  // FIXME: Should these even be selected?  Handle these cases in the caller?
+  switch (NodeToMatch->getOpcode()) {
+  default:
+    break;
+  case ISD::EntryToken:       // These nodes remain the same.
+  case ISD::BasicBlock:
+  case ISD::Register:
+  case ISD::HANDLENODE:
+  case ISD::TargetConstant:
+  case ISD::TargetConstantFP:
+  case ISD::TargetConstantPool:
+  case ISD::TargetFrameIndex:
+  case ISD::TargetExternalSymbol:
+  case ISD::TargetBlockAddress:
+  case ISD::TargetJumpTable:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::TargetGlobalAddress:
+  case ISD::TokenFactor:
+  case ISD::CopyFromReg:
+  case ISD::CopyToReg:
+    NodeToMatch->setNodeId(-1); // Mark selected.
+    return 0;
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+                                      NodeToMatch->getOperand(0));
+    return 0;
+  case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+  case ISD::EH_LABEL:  return Select_EH_LABEL(NodeToMatch);
+  case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);
+  }
+  
+  assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+  // Set up the node stack with NodeToMatch as the only node on the stack.
+  SmallVector<SDValue, 8> NodeStack;
+  SDValue N = SDValue(NodeToMatch, 0);
+  NodeStack.push_back(N);
+
+  // MatchScopes - Scopes used when matching, if a match failure happens, this
+  // indicates where to continue checking.
+  SmallVector<MatchScope, 8> MatchScopes;
+  
+  // RecordedNodes - This is the set of nodes that have been recorded by the
+  // state machine.
+  SmallVector<SDValue, 8> RecordedNodes;
+  
+  // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+  // pattern.
+  SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+  
+  // These are the current input chain and flag for use when generating nodes.
+  // Various Emit operations change these.  For example, emitting a copytoreg
+  // uses and updates these.
+  SDValue InputChain, InputFlag;
+  
+  // ChainNodesMatched - If a pattern matches nodes that have input/output
+  // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+  // which ones they are.  The result is captured into this list so that we can
+  // update the chain results when the pattern is complete.
+  SmallVector<SDNode*, 3> ChainNodesMatched;
+  SmallVector<SDNode*, 3> FlagResultNodesMatched;
+  
+  DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+        NodeToMatch->dump(CurDAG);
+        errs() << '\n');
+  
+  // Determine where to start the interpreter.  Normally we start at opcode #0,
+  // but if the state machine starts with an OPC_SwitchOpcode, then we
+  // accelerate the first lookup (which is guaranteed to be hot) with the
+  // OpcodeOffset table.
+  unsigned MatcherIndex = 0;
+  
+  if (!OpcodeOffset.empty()) {
+    // Already computed the OpcodeOffset table, just index into it.
+    if (N.getOpcode() < OpcodeOffset.size())
+      MatcherIndex = OpcodeOffset[N.getOpcode()];
+    DEBUG(errs() << "  Initial Opcode index to " << MatcherIndex << "\n");
+
+  } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+    // Otherwise, the table isn't computed, but the state machine does start
+    // with an OPC_SwitchOpcode instruction.  Populate the table now, since this
+    // is the first time we're selecting an instruction.
+    unsigned Idx = 1;
+    while (1) {
+      // Get the size of this case.
+      unsigned CaseSize = MatcherTable[Idx++];
+      if (CaseSize & 128)
+        CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+      if (CaseSize == 0) break;
+
+      // Get the opcode, add the index to the table.
+      unsigned Opc = MatcherTable[Idx++];
+      if (Opc >= OpcodeOffset.size())
+        OpcodeOffset.resize((Opc+1)*2);
+      OpcodeOffset[Opc] = Idx;
+      Idx += CaseSize;
+    }
+
+    // Okay, do the lookup for the first opcode.
+    if (N.getOpcode() < OpcodeOffset.size())
+      MatcherIndex = OpcodeOffset[N.getOpcode()];
+  }
+  
+  while (1) {
+    assert(MatcherIndex < TableSize && "Invalid index");
+    BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+    switch (Opcode) {
+    case OPC_Scope: {
+      // Okay, the semantics of this operation are that we should push a scope
+      // then evaluate the first child.  However, pushing a scope only to have
+      // the first check fail (which then pops it) is inefficient.  If we can
+      // determine immediately that the first check (or first several) will
+      // immediately fail, don't even bother pushing a scope for them.
+      unsigned FailIndex;
+      
+      while (1) {
+        unsigned NumToSkip = MatcherTable[MatcherIndex++];
+        if (NumToSkip & 128)
+          NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+        // Found the end of the scope with no match.
+        if (NumToSkip == 0) {
+          FailIndex = 0;
+          break;
+        }
+        
+        FailIndex = MatcherIndex+NumToSkip;
+        
+        // If we can't evaluate this predicate without pushing a scope (e.g. if
+        // it is a 'MoveParent') or if the predicate succeeds on this node, we
+        // push the scope and evaluate the full predicate chain.
+        bool Result;
+        MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+                                              Result, *this, RecordedNodes);
+        if (!Result)
+          break;
+        
+        DEBUG(errs() << "  Skipped scope entry at index " << MatcherIndex
+              << " continuing at " << FailIndex << "\n");
+
+        
+        // Otherwise, we know that this case of the Scope is guaranteed to fail,
+        // move to the next case.
+        MatcherIndex = FailIndex;
+      }
+      
+      // If the whole scope failed to match, bail.
+      if (FailIndex == 0) break;
+      
+      // Push a MatchScope which indicates where to go if the first child fails
+      // to match.
+      MatchScope NewEntry;
+      NewEntry.FailIndex = FailIndex;
+      NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+      NewEntry.NumRecordedNodes = RecordedNodes.size();
+      NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+      NewEntry.InputChain = InputChain;
+      NewEntry.InputFlag = InputFlag;
+      NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+      NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+      MatchScopes.push_back(NewEntry);
+      continue;
+    }
+    case OPC_RecordNode:
+      // Remember this node, it may end up being an operand in the pattern.
+      RecordedNodes.push_back(N);
+      continue;
+        
+    case OPC_RecordChild0: case OPC_RecordChild1:
+    case OPC_RecordChild2: case OPC_RecordChild3:
+    case OPC_RecordChild4: case OPC_RecordChild5:
+    case OPC_RecordChild6: case OPC_RecordChild7: {
+      unsigned ChildNo = Opcode-OPC_RecordChild0;
+      if (ChildNo >= N.getNumOperands())
+        break;  // Match fails if out of range child #.
+
+      RecordedNodes.push_back(N->getOperand(ChildNo));
+      continue;
+    }
+    case OPC_RecordMemRef:
+      MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+      continue;
+        
+    case OPC_CaptureFlagInput:
+      // If the current node has an input flag, capture it in InputFlag.
+      if (N->getNumOperands() != 0 &&
+          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
+        InputFlag = N->getOperand(N->getNumOperands()-1);
+      continue;
+        
+    case OPC_MoveChild: {
+      unsigned ChildNo = MatcherTable[MatcherIndex++];
+      if (ChildNo >= N.getNumOperands())
+        break;  // Match fails if out of range child #.
+      N = N.getOperand(ChildNo);
+      NodeStack.push_back(N);
+      continue;
+    }
+        
+    case OPC_MoveParent:
+      // Pop the current node off the NodeStack.
+      NodeStack.pop_back();
+      assert(!NodeStack.empty() && "Node stack imbalance!");
+      N = NodeStack.back();  
+      continue;
+     
+    case OPC_CheckSame:
+      if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+      continue;
+    case OPC_CheckPatternPredicate:
+      if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+      continue;
+    case OPC_CheckPredicate:
+      if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+                                N.getNode()))
+        break;
+      continue;
+    case OPC_CheckComplexPat: {
+      unsigned CPNum = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+                               RecordedNodes))
+        break;
+      continue;
+    }
+    case OPC_CheckOpcode:
+      if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+      continue;
+        
+    case OPC_CheckType:
+      if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+      continue;
+        
+    case OPC_SwitchOpcode: {
+      unsigned CurNodeOpcode = N.getOpcode();
+      unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+      unsigned CaseSize;
+      while (1) {
+        // Get the size of this case.
+        CaseSize = MatcherTable[MatcherIndex++];
+        if (CaseSize & 128)
+          CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+        if (CaseSize == 0) break;
+
+        // If the opcode matches, then we will execute this case.
+        if (CurNodeOpcode == MatcherTable[MatcherIndex++])
+          break;
+      
+        // Otherwise, skip over this case.
+        MatcherIndex += CaseSize;
+      }
+      
+      // If no cases matched, bail out.
+      if (CaseSize == 0) break;
+      
+      // Otherwise, execute the case we found.
+      DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
+                   << " to " << MatcherIndex << "\n");
+      continue;
+    }
+        
+    case OPC_SwitchType: {
+      MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+      unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+      unsigned CaseSize;
+      while (1) {
+        // Get the size of this case.
+        CaseSize = MatcherTable[MatcherIndex++];
+        if (CaseSize & 128)
+          CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+        if (CaseSize == 0) break;
+        
+        MVT::SimpleValueType CaseVT =
+          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+        if (CaseVT == MVT::iPTR)
+          CaseVT = TLI.getPointerTy().SimpleTy;
+        
+        // If the VT matches, then we will execute this case.
+        if (CurNodeVT == CaseVT)
+          break;
+        
+        // Otherwise, skip over this case.
+        MatcherIndex += CaseSize;
+      }
+      
+      // If no cases matched, bail out.
+      if (CaseSize == 0) break;
+      
+      // Otherwise, execute the case we found.
+      DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+                   << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+      continue;
+    }
+    case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+    case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+    case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+    case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+      if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+                            Opcode-OPC_CheckChild0Type))
+        break;
+      continue;
+    case OPC_CheckCondCode:
+      if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+      continue;
+    case OPC_CheckValueType:
+      if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+      continue;
+    case OPC_CheckInteger:
+      if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+      continue;
+    case OPC_CheckAndImm:
+      if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+      continue;
+    case OPC_CheckOrImm:
+      if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+      continue;
+        
+    case OPC_CheckFoldableChainNode: {
+      assert(NodeStack.size() != 1 && "No parent node");
+      // Verify that all intermediate nodes between the root and this one have
+      // a single use.
+      bool HasMultipleUses = false;
+      for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+        if (!NodeStack[i].hasOneUse()) {
+          HasMultipleUses = true;
+          break;
+        }
+      if (HasMultipleUses) break;
+
+      // Check to see that the target thinks this is profitable to fold and that
+      // we can fold it without inducing cycles in the graph.
+      if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+                              NodeToMatch) ||
+          !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+                         NodeToMatch, true/*We validate our own chains*/))
+        break;
+      
+      continue;
+    }
+    case OPC_EmitInteger: {
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      int64_t Val = MatcherTable[MatcherIndex++];
+      if (Val & 128)
+        Val = GetVBR(Val, MatcherTable, MatcherIndex);
+      RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+      continue;
+    }
+    case OPC_EmitRegister: {
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      unsigned RegNo = MatcherTable[MatcherIndex++];
+      RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+      continue;
+    }
+        
+    case OPC_EmitConvertToTarget:  {
+      // Convert from IMM/FPIMM to target version.
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      SDValue Imm = RecordedNodes[RecNo];
+
+      if (Imm->getOpcode() == ISD::Constant) {
+        int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
+        Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+      } else if (Imm->getOpcode() == ISD::ConstantFP) {
+        const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+        Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+      }
+      
+      RecordedNodes.push_back(Imm);
+      continue;
+    }
+        
+    case OPC_EmitMergeInputChains: {
+      assert(InputChain.getNode() == 0 &&
+             "EmitMergeInputChains should be the first chain producing node");
+      // This node gets a list of nodes we matched in the input that have
+      // chains.  We want to token factor all of the input chains to these nodes
+      // together.  However, if any of the input chains is actually one of the
+      // nodes matched in this pattern, then we have an intra-match reference.
+      // Ignore these because the newly token factored chain should not refer to
+      // the old nodes.
+      unsigned NumChains = MatcherTable[MatcherIndex++];
+      assert(NumChains != 0 && "Can't TF zero chains");
+
+      assert(ChainNodesMatched.empty() &&
+             "Should only have one EmitMergeInputChains per match");
+
+      // Read all of the chained nodes.
+      for (unsigned i = 0; i != NumChains; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+        ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+        
+        // FIXME: What if other value results of the node have uses not matched
+        // by this pattern?
+        if (ChainNodesMatched.back() != NodeToMatch &&
+            !RecordedNodes[RecNo].hasOneUse()) {
+          ChainNodesMatched.clear();
+          break;
+        }
+      }
+      
+      // If the inner loop broke out, the match fails.
+      if (ChainNodesMatched.empty())
+        break;
+
+      // Merge the input chains if they are not intra-pattern references.
+      InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+      
+      if (InputChain.getNode() == 0)
+        break;  // Failed to merge.
+
+      continue;
+    }
+        
+    case OPC_EmitCopyToReg: {
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+      
+      if (InputChain.getNode() == 0)
+        InputChain = CurDAG->getEntryNode();
+      
+      InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+                                        DestPhysReg, RecordedNodes[RecNo],
+                                        InputFlag);
+      
+      InputFlag = InputChain.getValue(1);
+      continue;
+    }
+        
+    case OPC_EmitNodeXForm: {
+      unsigned XFormNo = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+      continue;
+    }
+        
+    case OPC_EmitNode:
+    case OPC_MorphNodeTo: {
+      uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+      TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+      unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+      // Get the result VT list.
+      unsigned NumVTs = MatcherTable[MatcherIndex++];
+      SmallVector<EVT, 4> VTs;
+      for (unsigned i = 0; i != NumVTs; ++i) {
+        MVT::SimpleValueType VT =
+          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+        if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+        VTs.push_back(VT);
+      }
+      
+      if (EmitNodeInfo & OPFL_Chain)
+        VTs.push_back(MVT::Other);
+      if (EmitNodeInfo & OPFL_FlagOutput)
+        VTs.push_back(MVT::Flag);
+      
+      // This is hot code, so optimize the two most common cases of 1 and 2
+      // results.
+      SDVTList VTList;
+      if (VTs.size() == 1)
+        VTList = CurDAG->getVTList(VTs[0]);
+      else if (VTs.size() == 2)
+        VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+      else
+        VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+      // Get the operand list.
+      unsigned NumOps = MatcherTable[MatcherIndex++];
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i != NumOps; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        if (RecNo & 128)
+          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+        
+        assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+        Ops.push_back(RecordedNodes[RecNo]);
+      }
+      
+      // If there are variadic operands to add, handle them now.
+      if (EmitNodeInfo & OPFL_VariadicInfo) {
+        // Determine the start index to copy from.
+        unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+        FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+        assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+               "Invalid variadic node");
+        // Copy all of the variadic operands, not including a potential flag
+        // input.
+        for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+             i != e; ++i) {
+          SDValue V = NodeToMatch->getOperand(i);
+          if (V.getValueType() == MVT::Flag) break;
+          Ops.push_back(V);
+        }
+      }
+      
+      // If this has chain/flag inputs, add them.
+      if (EmitNodeInfo & OPFL_Chain)
+        Ops.push_back(InputChain);
+      if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
+        Ops.push_back(InputFlag);
+      
+      // Create the node.
+      SDNode *Res = 0;
+      if (Opcode != OPC_MorphNodeTo) {
+        // If this is a normal EmitNode command, just create the new node and
+        // add the results to the RecordedNodes list.
+        Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+                                     VTList, Ops.data(), Ops.size());
+        
+        // Add all the non-flag/non-chain results to the RecordedNodes list.
+        for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+          if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
+          RecordedNodes.push_back(SDValue(Res, i));
+        }
+        
+      } else {
+        Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+                        EmitNodeInfo);
+      }
+      
+      // If the node had chain/flag results, update our notion of the current
+      // chain and flag.
+      if (EmitNodeInfo & OPFL_FlagOutput) {
+        InputFlag = SDValue(Res, VTs.size()-1);
+        if (EmitNodeInfo & OPFL_Chain)
+          InputChain = SDValue(Res, VTs.size()-2);
+      } else if (EmitNodeInfo & OPFL_Chain)
+        InputChain = SDValue(Res, VTs.size()-1);
+
+      // If the OPFL_MemRefs flag is set on this node, slap all of the
+      // accumulated memrefs onto it.
+      //
+      // FIXME: This is vastly incorrect for patterns with multiple outputs
+      // instructions that access memory and for ComplexPatterns that match
+      // loads.
+      if (EmitNodeInfo & OPFL_MemRefs) {
+        MachineSDNode::mmo_iterator MemRefs =
+          MF->allocateMemRefsArray(MatchedMemRefs.size());
+        std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs);
+        cast<MachineSDNode>(Res)
+          ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
+      }
+      
+      DEBUG(errs() << "  "
+                   << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+                   << " node: "; Res->dump(CurDAG); errs() << "\n");
+      
+      // If this was a MorphNodeTo then we're completely done!
+      if (Opcode == OPC_MorphNodeTo) {
+        // Update chain and flag uses.
+        UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
+                             InputFlag, FlagResultNodesMatched, true);
+        return Res;
+      }
+      
+      continue;
+    }
+        
+    case OPC_MarkFlagResults: {
+      unsigned NumNodes = MatcherTable[MatcherIndex++];
+      
+      // Read and remember all the flag-result nodes.
+      for (unsigned i = 0; i != NumNodes; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        if (RecNo & 128)
+          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+        FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+      }
+      continue;
+    }
+      
+    case OPC_CompleteMatch: {
+      // The match has been completed, and any new nodes (if any) have been
+      // created.  Patch up references to the matched dag to use the newly
+      // created nodes.
+      unsigned NumResults = MatcherTable[MatcherIndex++];
+
+      for (unsigned i = 0; i != NumResults; ++i) {
+        unsigned ResSlot = MatcherTable[MatcherIndex++];
+        if (ResSlot & 128)
+          ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+        
+        assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+        SDValue Res = RecordedNodes[ResSlot];
+        
+        // FIXME2: Eliminate this horrible hack by fixing the 'Gen' program
+        // after (parallel) on input patterns are removed.  This would also
+        // allow us to stop encoding #results in OPC_CompleteMatch's table
+        // entry.
+        if (NodeToMatch->getNumValues() <= i ||
+            NodeToMatch->getValueType(i) == MVT::Other ||
+            NodeToMatch->getValueType(i) == MVT::Flag)
+          break;
+        assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+                NodeToMatch->getValueType(i) == MVT::iPTR ||
+                Res.getValueType() == MVT::iPTR ||
+                NodeToMatch->getValueType(i).getSizeInBits() ==
+                    Res.getValueType().getSizeInBits()) &&
+               "invalid replacement");
+        CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+      }
+
+      // If the root node defines a flag, add it to the flag nodes to update
+      // list.
+      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
+        FlagResultNodesMatched.push_back(NodeToMatch);
+      
+      // Update chain and flag uses.
+      UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
+                           InputFlag, FlagResultNodesMatched, false);
+      
+      assert(NodeToMatch->use_empty() &&
+             "Didn't replace all uses of the node?");
+      
+      // FIXME: We just return here, which interacts correctly with SelectRoot
+      // above.  We should fix this to not return an SDNode* anymore.
+      return 0;
+    }
+    }
+    
+    // If the code reached this point, then the match failed.  See if there is
+    // another child to try in the current 'Scope', otherwise pop it until we
+    // find a case to check.
+    while (1) {
+      if (MatchScopes.empty()) {
+        CannotYetSelect(NodeToMatch);
+        return 0;
+      }
+
+      // Restore the interpreter state back to the point where the scope was
+      // formed.
+      MatchScope &LastScope = MatchScopes.back();
+      RecordedNodes.resize(LastScope.NumRecordedNodes);
+      NodeStack.clear();
+      NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+      N = NodeStack.back();
+
+      DEBUG(errs() << "  Match failed at index " << MatcherIndex
+                   << " continuing at " << LastScope.FailIndex << "\n");
+    
+      if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+        MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+      MatcherIndex = LastScope.FailIndex;
+      
+      InputChain = LastScope.InputChain;
+      InputFlag = LastScope.InputFlag;
+      if (!LastScope.HasChainNodesMatched)
+        ChainNodesMatched.clear();
+      if (!LastScope.HasFlagResultNodesMatched)
+        FlagResultNodesMatched.clear();
+
+      // Check to see what the offset is at the new MatcherIndex.  If it is zero
+      // we have reached the end of this scope, otherwise we have another child
+      // in the current scope to try.
+      unsigned NumToSkip = MatcherTable[MatcherIndex++];
+      if (NumToSkip & 128)
+        NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+      // If we have another child in this scope to match, update FailIndex and
+      // try it.
+      if (NumToSkip != 0) {
+        LastScope.FailIndex = MatcherIndex+NumToSkip;
+        break;
+      }
+      
+      // End of this scope, pop it and try the next child in the containing
+      // scope.
+      MatchScopes.pop_back();
+    }
+  }
+}
+    
+
+
 void SelectionDAGISel::CannotYetSelect(SDNode *N) {
   std::string msg;
   raw_string_ostream Msg(msg);
   Msg << "Cannot yet select: ";
-  N->printrFull(Msg, CurDAG);
+  
+  if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_VOID) {
+    N->printrFull(Msg, CurDAG);
+  } else {
+    bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+    unsigned iid =
+      cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+    if (iid < Intrinsic::num_intrinsics)
+      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+    else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+      Msg << "target intrinsic %" << TII->getName(iid);
+    else
+      Msg << "unknown intrinsic #" << iid;
+  }
   llvm_report_error(Msg.str());
 }
 
-void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) {
-  dbgs() << "Cannot yet select: ";
-  unsigned iid =
-    cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() ==
-                                       MVT::Other))->getZExtValue();
-  if (iid < Intrinsic::num_intrinsics)
-    llvm_report_error("Cannot yet select: intrinsic %" +
-                      Intrinsic::getName((Intrinsic::ID)iid));
-  else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
-    llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
-                      tii->getName(iid));
-}
-
 char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d74ec7e..8d0d884 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <ctype.h>
 using namespace llvm;
 
 namespace llvm {
@@ -540,6 +541,24 @@
   delete &TLOF;
 }
 
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
+  assert(isTypeLegal(VT));
+  switch (Op) {
+  default:
+    return false;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+    return true;
+  }
+}
+
+
 static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                        unsigned &NumIntermediates,
                                        EVT &RegisterVT,
@@ -1423,8 +1442,10 @@
   case ISD::TRUNCATE: {
     // Simplify the input, using demanded bit information, and compute the known
     // zero/one bits live out.
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
     APInt TruncMask = NewMask;
-    TruncMask.zext(Op.getOperand(0).getValueSizeInBits());
+    TruncMask.zext(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
@@ -1435,15 +1456,14 @@
     // on the known demanded bits.
     if (Op.getOperand(0).getNode()->hasOneUse()) {
       SDValue In = Op.getOperand(0);
-      unsigned InBitWidth = In.getValueSizeInBits();
       switch (In.getOpcode()) {
       default: break;
       case ISD::SRL:
         // Shrink SRL by a constant if none of the high bits shifted in are
         // demanded.
         if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
-          APInt HighBits = APInt::getHighBitsSet(InBitWidth,
-                                                 InBitWidth - BitWidth);
+          APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+                                                 OperandBitWidth - BitWidth);
           HighBits = HighBits.lshr(ShAmt->getZExtValue());
           HighBits.trunc(BitWidth);
           
@@ -1589,7 +1609,7 @@
 
   // Fall back to ComputeMaskedBits to catch other known cases.
   EVT OpVT = Val.getValueType();
-  unsigned BitWidth = OpVT.getSizeInBits();
+  unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
   APInt Mask = APInt::getAllOnesValue(BitWidth);
   APInt KnownZero, KnownOne;
   DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
@@ -1698,7 +1718,7 @@
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
                                         Lod->getSrcValue(), 
                                         Lod->getSrcValueOffset() + bestOffset,
-                                        false, NewAlign);
+                                        false, false, NewAlign);
           return DAG.getSetCC(dl, VT, 
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
@@ -1757,7 +1777,7 @@
         break;   // todo, be more careful with signed comparisons
       }
     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
-                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+               (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
       EVT ExtDstTy = N0.getValueType();
@@ -1791,22 +1811,21 @@
                           Cond);
     } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-      
       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
-      if (N0.getOpcode() == ISD::SETCC) {
+      if (N0.getOpcode() == ISD::SETCC &&
+          isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
         if (TrueWhenTrue)
-          return N0;
-        
+          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);        
         // Invert the condition.
         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
         CC = ISD::getSetCCInverse(CC, 
                                   N0.getOperand(0).getValueType().isInteger());
         return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
       }
-      
+
       if ((N0.getOpcode() == ISD::XOR ||
-            (N0.getOpcode() == ISD::AND && 
+           (N0.getOpcode() == ISD::AND && 
             N0.getOperand(0).getOpcode() == ISD::XOR &&
             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
           isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -1829,9 +1848,36 @@
                               N0.getOperand(0).getOperand(0),
                               N0.getOperand(1));
           }
+
           return DAG.getSetCC(dl, VT, Val, N1,
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
         }
+      } else if (N1C->getAPIntValue() == 1 &&
+                 (VT == MVT::i1 ||
+                  getBooleanContents() == ZeroOrOneBooleanContent)) {
+        SDValue Op0 = N0;
+        if (Op0.getOpcode() == ISD::TRUNCATE)
+          Op0 = Op0.getOperand(0);
+
+        if ((Op0.getOpcode() == ISD::XOR) &&
+            Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+            Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+          // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+          Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+          return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+                              Cond);
+        } else if (Op0.getOpcode() == ISD::AND &&
+                isa<ConstantSDNode>(Op0.getOperand(1)) &&
+                cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+          // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+          if (Op0.getValueType() != VT)
+            Op0 = DAG.getNode(ISD::AND, dl, VT,
+                          DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+                          DAG.getConstant(1, VT));
+          return DAG.getSetCC(dl, VT, Op0,
+                              DAG.getConstant(0, Op0.getValueType()),
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
       }
     }