Check in LLVM r95781.
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
new file mode 100644
index 0000000..80c7d7c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -0,0 +1,25 @@
+add_llvm_library(LLVMSelectionDAG
+  CallingConvLower.cpp
+  DAGCombiner.cpp
+  FastISel.cpp
+  FunctionLoweringInfo.cpp
+  InstrEmitter.cpp
+  LegalizeDAG.cpp
+  LegalizeFloatTypes.cpp
+  LegalizeIntegerTypes.cpp
+  LegalizeTypes.cpp
+  LegalizeTypesGeneric.cpp
+  LegalizeVectorOps.cpp
+  LegalizeVectorTypes.cpp
+  ScheduleDAGFast.cpp
+  ScheduleDAGList.cpp
+  ScheduleDAGRRList.cpp
+  ScheduleDAGSDNodes.cpp
+  SelectionDAG.cpp
+  SelectionDAGBuilder.cpp
+  SelectionDAGISel.cpp
+  SelectionDAGPrinter.cpp
+  TargetLowering.cpp
+  )
+
+target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
new file mode 100644
index 0000000..4e6c1fc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -0,0 +1,179 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
+                 SmallVector<CCValAssign, 16> &locs, LLVMContext &C)
+  : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) {
+  // No stack is used.
+  StackOffset = 0;
+  
+  UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+                          EVT LocVT, CCValAssign::LocInfo LocInfo,
+                          int MinSize, int MinAlign,
+                          ISD::ArgFlagsTy ArgFlags) {
+  unsigned Align = ArgFlags.getByValAlign();
+  unsigned Size  = ArgFlags.getByValSize();
+  if (MinSize > (int)Size)
+    Size = MinSize;
+  if (MinAlign > (int)Align)
+    Align = MinAlign;
+  unsigned Offset = AllocateStack(Size, Align);
+
+  addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+  UsedRegs[Reg/32] |= 1 << (Reg&31);
+  
+  if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
+    for (; (Reg = *RegAliases); ++RegAliases)
+      UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  unsigned NumArgs = Ins.size();
+
+  for (unsigned i = 0; i != NumArgs; ++i) {
+    EVT ArgVT = Ins[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Formal argument #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys,
+                          const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+                          CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = OutTys.size(); i != e; ++i) {
+    EVT VT = OutTys[i];
+    ISD::ArgFlagsTy ArgFlags = ArgsFlags[i];
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+      return false;
+  }
+  return true;
+}
+
+/// AnalyzeReturn - Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                            CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    EVT VT = Outs[i].Val.getValueType();
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Return operand #" << i << " has unhandled type "
+             << VT.getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  CCAssignFn Fn) {
+  unsigned NumOps = Outs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    EVT ArgVT = Outs[i].Val.getValueType();
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+                                  SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                                  CCAssignFn Fn) {
+  unsigned NumOps = ArgVTs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    EVT ArgVT = ArgVTs[i];
+    ISD::ArgFlagsTy ArgFlags = Flags[i];
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallResult - Analyze the return values of a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    EVT VT = Ins[i].VT;
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call result #" << i << " has unhandled type "
+             << VT.getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
+  if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+    dbgs() << "Call result has unhandled type "
+           << VT.getEVTString();
+#endif
+    llvm_unreachable(0);
+  }
+}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000..9189e71
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,6454 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined   , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
+
+namespace {
+  static cl::opt<bool>
+    CombinerAA("combiner-alias-analysis", cl::Hidden,
+               cl::desc("Turn on alias analysis during testing"));
+
+  static cl::opt<bool>
+    CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+               cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+  class DAGCombiner {
+    SelectionDAG &DAG;
+    const TargetLowering &TLI;
+    CombineLevel Level;
+    CodeGenOpt::Level OptLevel;
+    bool LegalOperations;
+    bool LegalTypes;
+
+    // Worklist of all of the nodes that need to be simplified.
+    std::vector<SDNode*> WorkList;
+
+    // AA - Used for DAG load/store alias analysis.
+    AliasAnalysis &AA;
+
+    /// AddUsersToWorkList - When an instruction is simplified, add all users of
+    /// the instruction to the work lists because they might get more simplified
+    /// now.
+    ///
+    void AddUsersToWorkList(SDNode *N) {
+      for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+           UI != UE; ++UI)
+        AddToWorkList(*UI);
+    }
+
+    /// visit - call the node-specific routine that knows how to fold each
+    /// particular type of node.
+    SDValue visit(SDNode *N);
+
+  public:
+    /// AddToWorkList - Add to the work list making sure it's instance is at the
+    /// the back (next to be processed.)
+    void AddToWorkList(SDNode *N) {
+      removeFromWorkList(N);
+      WorkList.push_back(N);
+    }
+
+    /// removeFromWorkList - remove all instances of N from the worklist.
+    ///
+    void removeFromWorkList(SDNode *N) {
+      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+                     WorkList.end());
+    }
+
+    SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+                      bool AddTo = true);
+
+    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+      return CombineTo(N, &Res, 1, AddTo);
+    }
+
+    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+                      bool AddTo = true) {
+      SDValue To[] = { Res0, Res1 };
+      return CombineTo(N, To, 2, AddTo);
+    }
+
+    void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+  private:
+
+    /// SimplifyDemandedBits - Check the specified integer node value to see if
+    /// it can be simplified or if things it uses can be simplified by bit
+    /// propagation.  If so, return true.
+    bool SimplifyDemandedBits(SDValue Op) {
+      unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
+      return SimplifyDemandedBits(Op, Demanded);
+    }
+
+    bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+    bool CombineToPreIndexedLoadStore(SDNode *N);
+    bool CombineToPostIndexedLoadStore(SDNode *N);
+
+
+    /// combine - call the node-specific routine that knows how to fold each
+    /// particular type of node. If that doesn't do anything, try the
+    /// target-specific DAG combines.
+    SDValue combine(SDNode *N);
+
+    // Visitation implementation - Implement dag node combining for different
+    // node types.  The semantics are as follows:
+    // Return Value:
+    //   SDValue.getNode() == 0 - No change was made
+    //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
+    //   otherwise              - N should be replaced by the returned Operand.
+    //
+    SDValue visitTokenFactor(SDNode *N);
+    SDValue visitMERGE_VALUES(SDNode *N);
+    SDValue visitADD(SDNode *N);
+    SDValue visitSUB(SDNode *N);
+    SDValue visitADDC(SDNode *N);
+    SDValue visitADDE(SDNode *N);
+    SDValue visitMUL(SDNode *N);
+    SDValue visitSDIV(SDNode *N);
+    SDValue visitUDIV(SDNode *N);
+    SDValue visitSREM(SDNode *N);
+    SDValue visitUREM(SDNode *N);
+    SDValue visitMULHU(SDNode *N);
+    SDValue visitMULHS(SDNode *N);
+    SDValue visitSMUL_LOHI(SDNode *N);
+    SDValue visitUMUL_LOHI(SDNode *N);
+    SDValue visitSDIVREM(SDNode *N);
+    SDValue visitUDIVREM(SDNode *N);
+    SDValue visitAND(SDNode *N);
+    SDValue visitOR(SDNode *N);
+    SDValue visitXOR(SDNode *N);
+    SDValue SimplifyVBinOp(SDNode *N);
+    SDValue visitSHL(SDNode *N);
+    SDValue visitSRA(SDNode *N);
+    SDValue visitSRL(SDNode *N);
+    SDValue visitCTLZ(SDNode *N);
+    SDValue visitCTTZ(SDNode *N);
+    SDValue visitCTPOP(SDNode *N);
+    SDValue visitSELECT(SDNode *N);
+    SDValue visitSELECT_CC(SDNode *N);
+    SDValue visitSETCC(SDNode *N);
+    SDValue visitSIGN_EXTEND(SDNode *N);
+    SDValue visitZERO_EXTEND(SDNode *N);
+    SDValue visitANY_EXTEND(SDNode *N);
+    SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+    SDValue visitTRUNCATE(SDNode *N);
+    SDValue visitBIT_CONVERT(SDNode *N);
+    SDValue visitBUILD_PAIR(SDNode *N);
+    SDValue visitFADD(SDNode *N);
+    SDValue visitFSUB(SDNode *N);
+    SDValue visitFMUL(SDNode *N);
+    SDValue visitFDIV(SDNode *N);
+    SDValue visitFREM(SDNode *N);
+    SDValue visitFCOPYSIGN(SDNode *N);
+    SDValue visitSINT_TO_FP(SDNode *N);
+    SDValue visitUINT_TO_FP(SDNode *N);
+    SDValue visitFP_TO_SINT(SDNode *N);
+    SDValue visitFP_TO_UINT(SDNode *N);
+    SDValue visitFP_ROUND(SDNode *N);
+    SDValue visitFP_ROUND_INREG(SDNode *N);
+    SDValue visitFP_EXTEND(SDNode *N);
+    SDValue visitFNEG(SDNode *N);
+    SDValue visitFABS(SDNode *N);
+    SDValue visitBRCOND(SDNode *N);
+    SDValue visitBR_CC(SDNode *N);
+    SDValue visitLOAD(SDNode *N);
+    SDValue visitSTORE(SDNode *N);
+    SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+    SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+    SDValue visitBUILD_VECTOR(SDNode *N);
+    SDValue visitCONCAT_VECTORS(SDNode *N);
+    SDValue visitVECTOR_SHUFFLE(SDNode *N);
+
+    SDValue XformToShuffleWithZero(SDNode *N);
+    SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+    SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+    bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+    SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+    SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+    SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+                             SDValue N3, ISD::CondCode CC,
+                             bool NotExtCompare = false);
+    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+                          DebugLoc DL, bool foldBooleans = true);
+    SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+                                         unsigned HiOp);
+    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
+    SDValue BuildSDIV(SDNode *N);
+    SDValue BuildUDIV(SDNode *N);
+    SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+    SDValue ReduceLoadWidth(SDNode *N);
+    SDValue ReduceLoadOpStoreWidth(SDNode *N);
+
+    SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+    /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for aliasing nodes and adding them to the Aliases vector.
+    void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+                          SmallVector<SDValue, 8> &Aliases);
+
+    /// isAlias - Return true if there is any possibility that the two addresses
+    /// overlap.
+    bool isAlias(SDValue Ptr1, int64_t Size1,
+                 const Value *SrcValue1, int SrcValueOffset1,
+                 unsigned SrcValueAlign1,
+                 SDValue Ptr2, int64_t Size2,
+                 const Value *SrcValue2, int SrcValueOffset2,
+                 unsigned SrcValueAlign2) const;
+
+    /// FindAliasInfo - Extracts the relevant alias information from the memory
+    /// node.  Returns true if the operand was a load.
+    bool FindAliasInfo(SDNode *N,
+                       SDValue &Ptr, int64_t &Size,
+                       const Value *&SrcValue, int &SrcValueOffset,
+                       unsigned &SrcValueAlignment) const;
+
+    /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for a better chain (aliasing node.)
+    SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+    /// getShiftAmountTy - Returns a type large enough to hold any valid
+    /// shift amount - before type legalization these can be huge.
+    EVT getShiftAmountTy() {
+      return LegalTypes ?  TLI.getShiftAmountTy() : TLI.getPointerTy();
+    }
+
+public:
+    DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+      : DAG(D),
+        TLI(D.getTargetLoweringInfo()),
+        Level(Unrestricted),
+        OptLevel(OL),
+        LegalOperations(false),
+        LegalTypes(false),
+        AA(A) {}
+
+    /// Run - runs the dag combiner on all nodes in the work list
+    void Run(CombineLevel AtLevel);
+  };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+  DAGCombiner &DC;
+public:
+  explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    DC.removeFromWorkList(N);
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Ignore updates.
+  }
+};
+}
+
+//===----------------------------------------------------------------------===//
+//  TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+  ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+                               unsigned Depth = 0) {
+  // No compile time optimizations on this type.
+  if (Op.getValueType() == MVT::ppcf128)
+    return 0;
+
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return 2;
+
+  // Don't allow anything with multiple uses.
+  if (!Op.hasOneUse()) return 0;
+
+  // Don't recurse exponentially.
+  if (Depth > 6) return 0;
+
+  switch (Op.getOpcode()) {
+  default: return false;
+  case ISD::ConstantFP:
+    // Don't invert constant FP values after legalize.  The negated constant
+    // isn't necessarily legal.
+    return LegalOperations ? 0 : 1;
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    if (!UnsafeFPMath) return 0;
+
+    // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return V;
+    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros.
+    if (!UnsafeFPMath) return 0;
+
+    // fold (fneg (fsub A, B)) -> (fsub B, A)
+    return 1;
+
+  case ISD::FMUL:
+  case ISD::FDIV:
+    if (HonorSignDependentRoundingFPMath()) return 0;
+
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return V;
+
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FSIN:
+    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+  }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+                                    bool LegalOperations, unsigned Depth = 0) {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+  // Don't allow anything with multiple uses.
+  assert(Op.hasOneUse() && "Unknown reuse!");
+
+  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Unknown code");
+  case ISD::ConstantFP: {
+    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+    V.changeSign();
+    return DAG.getConstantFP(V, Op.getValueType());
+  }
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    assert(UnsafeFPMath);
+
+    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(1), DAG,
+                                            LegalOperations, Depth+1),
+                       Op.getOperand(0));
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros.
+    assert(UnsafeFPMath);
+
+    // fold (fneg (fsub 0, B)) -> B
+    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+      if (N0CFP->getValueAPF().isZero())
+        return Op.getOperand(1);
+
+    // fold (fneg (fsub A, B)) -> (fsub B, A)
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(0));
+
+  case ISD::FMUL:
+  case ISD::FDIV:
+    assert(!HonorSignDependentRoundingFPMath());
+
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+
+    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                       Op.getOperand(0),
+                       GetNegatedExpression(Op.getOperand(1), DAG,
+                                            LegalOperations, Depth+1));
+
+  case ISD::FP_EXTEND:
+  case ISD::FSIN:
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(0), DAG,
+                                            LegalOperations, Depth+1));
+  case ISD::FP_ROUND:
+      return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+  }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking.  This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+                              SDValue &CC) {
+  if (N.getOpcode() == ISD::SETCC) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(2);
+    return true;
+  }
+  if (N.getOpcode() == ISD::SELECT_CC &&
+      N.getOperand(2).getOpcode() == ISD::Constant &&
+      N.getOperand(3).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+      cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(4);
+    return true;
+  }
+  return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use.  If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+  SDValue N0, N1, N2;
+  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+    return true;
+  return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+                                    SDValue N0, SDValue N1) {
+  EVT VT = N0.getValueType();
+  if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+    if (isa<ConstantSDNode>(N1)) {
+      // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+      SDValue OpNode =
+        DAG.FoldConstantArithmetic(Opc, VT,
+                                   cast<ConstantSDNode>(N0.getOperand(1)),
+                                   cast<ConstantSDNode>(N1));
+      return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+    } else if (N0.hasOneUse()) {
+      // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+      SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+                                   N0.getOperand(0), N1);
+      AddToWorkList(OpNode.getNode());
+      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+    }
+  }
+
+  if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+    if (isa<ConstantSDNode>(N0)) {
+      // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+      SDValue OpNode =
+        DAG.FoldConstantArithmetic(Opc, VT,
+                                   cast<ConstantSDNode>(N1.getOperand(1)),
+                                   cast<ConstantSDNode>(N0));
+      return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+    } else if (N1.hasOneUse()) {
+      // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+      SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+                                   N1.getOperand(0), N0);
+      AddToWorkList(OpNode.getNode());
+      return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+                               bool AddTo) {
+  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.1 ";
+        N->dump(&DAG);
+        dbgs() << "\nWith: ";
+        To[0].getNode()->dump(&DAG);
+        dbgs() << " and " << NumTo-1 << " other values\n";
+        for (unsigned i = 0, e = NumTo; i != e; ++i)
+          assert((!To[i].getNode() ||
+                  N->getValueType(i) == To[i].getValueType()) &&
+                 "Cannot combine value to value of different type!"));
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
+
+  if (AddTo) {
+    // Push the new nodes and any users onto the worklist
+    for (unsigned i = 0, e = NumTo; i != e; ++i) {
+      if (To[i].getNode()) {
+        AddToWorkList(To[i].getNode());
+        AddUsersToWorkList(To[i].getNode());
+      }
+    }
+  }
+
+  // Finally, if the node is now dead, remove it from the graph.  The node
+  // may not be dead if the replacement process recursively simplified to
+  // something else needing this node.
+  if (N->use_empty()) {
+    // Nodes can be reintroduced into the worklist.  Make sure we do not
+    // process a node that has been replaced.
+    removeFromWorkList(N);
+
+    // Finally, since the node is now dead, remove it from the graph.
+    DAG.DeleteNode(N);
+  }
+  return SDValue(N, 0);
+}
+
+void
+DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
+                                                                          TLO) {
+  // Replace all uses.  If any nodes become isomorphic to other nodes and
+  // are deleted, make sure to remove them from our worklist.
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+  // Push the new node and any (possibly new) users onto the worklist.
+  AddToWorkList(TLO.New.getNode());
+  AddUsersToWorkList(TLO.New.getNode());
+
+  // Finally, if the node is now dead, remove it from the graph.  The node
+  // may not be dead if the replacement process recursively simplified to
+  // something else needing this node.
+  if (TLO.Old.getNode()->use_empty()) {
+    removeFromWorkList(TLO.Old.getNode());
+
+    // If the operands of this node are only used by the node, they will now
+    // be dead.  Make sure to visit them first to delete dead nodes early.
+    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+      if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+        AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+    DAG.DeleteNode(TLO.Old.getNode());
+  }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation.  If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+  TargetLowering::TargetLoweringOpt TLO(DAG);
+  APInt KnownZero, KnownOne;
+  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+    return false;
+
+  // Revisit the node.
+  AddToWorkList(Op.getNode());
+
+  // Replace the old value with the new one.
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.2 "; 
+        TLO.Old.getNode()->dump(&DAG);
+        dbgs() << "\nWith: ";
+        TLO.New.getNode()->dump(&DAG);
+        dbgs() << '\n');
+
+  CommitTargetLoweringOpt(TLO);
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//  Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+  // set the instance variables, so that the various visit routines may use it.
+  Level = AtLevel;
+  LegalOperations = Level >= NoIllegalOperations;
+  LegalTypes = Level >= NoIllegalTypes;
+
+  // Add all the dag nodes to the worklist.
+  WorkList.reserve(DAG.allnodes_size());
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I)
+    WorkList.push_back(I);
+
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted, and tracking any
+  // changes of the root.
+  HandleSDNode Dummy(DAG.getRoot());
+
+  // The root of the dag may dangle to deleted nodes until the dag combiner is
+  // done.  Set it to null to avoid confusion.
+  DAG.setRoot(SDValue());
+
+  // while the worklist isn't empty, inspect the node on the end of it and
+  // try and combine it.
+  while (!WorkList.empty()) {
+    SDNode *N = WorkList.back();
+    WorkList.pop_back();
+
+    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
+    // N is deleted from the DAG, since they too may now be dead or may have a
+    // reduced number of uses, allowing other xforms.
+    if (N->use_empty() && N != &Dummy) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        AddToWorkList(N->getOperand(i).getNode());
+
+      DAG.DeleteNode(N);
+      continue;
+    }
+
+    SDValue RV = combine(N);
+
+    if (RV.getNode() == 0)
+      continue;
+
+    ++NodesCombined;
+
+    // If we get back the same node we passed in, rather than a new node or
+    // zero, we know that the node must have defined multiple values and
+    // CombineTo was used.  Since CombineTo takes care of the worklist
+    // mechanics for us, we have no work to do in this case.
+    if (RV.getNode() == N)
+      continue;
+
+    assert(N->getOpcode() != ISD::DELETED_NODE &&
+           RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+           "Node was deleted but visit returned new node!");
+
+    DEBUG(dbgs() << "\nReplacing.3 "; 
+          N->dump(&DAG);
+          dbgs() << "\nWith: ";
+          RV.getNode()->dump(&DAG);
+          dbgs() << '\n');
+    WorkListRemover DeadNodes(*this);
+    if (N->getNumValues() == RV.getNode()->getNumValues())
+      DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+    else {
+      assert(N->getValueType(0) == RV.getValueType() &&
+             N->getNumValues() == 1 && "Type mismatch");
+      SDValue OpV = RV;
+      DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+    }
+
+    // Push the new node and any users onto the worklist
+    AddToWorkList(RV.getNode());
+    AddUsersToWorkList(RV.getNode());
+
+    // Add any uses of the old node to the worklist in case this node is the
+    // last one that uses them.  They may become dead after this node is
+    // deleted.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      AddToWorkList(N->getOperand(i).getNode());
+
+    // Finally, if the node is now dead, remove it from the graph.  The node
+    // may not be dead if the replacement process recursively simplified to
+    // something else needing this node.
+    if (N->use_empty()) {
+      // Nodes can be reintroduced into the worklist.  Make sure we do not
+      // process a node that has been replaced.
+      removeFromWorkList(N);
+
+      // Finally, since the node is now dead, remove it from the graph.
+      DAG.DeleteNode(N);
+    }
+  }
+
+  // If the root changed (e.g. it was a dead load, update the root).
+  DAG.setRoot(Dummy.getValue());
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+  switch(N->getOpcode()) {
+  default: break;
+  case ISD::TokenFactor:        return visitTokenFactor(N);
+  case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
+  case ISD::ADD:                return visitADD(N);
+  case ISD::SUB:                return visitSUB(N);
+  case ISD::ADDC:               return visitADDC(N);
+  case ISD::ADDE:               return visitADDE(N);
+  case ISD::MUL:                return visitMUL(N);
+  case ISD::SDIV:               return visitSDIV(N);
+  case ISD::UDIV:               return visitUDIV(N);
+  case ISD::SREM:               return visitSREM(N);
+  case ISD::UREM:               return visitUREM(N);
+  case ISD::MULHU:              return visitMULHU(N);
+  case ISD::MULHS:              return visitMULHS(N);
+  case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
+  case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
+  case ISD::SDIVREM:            return visitSDIVREM(N);
+  case ISD::UDIVREM:            return visitUDIVREM(N);
+  case ISD::AND:                return visitAND(N);
+  case ISD::OR:                 return visitOR(N);
+  case ISD::XOR:                return visitXOR(N);
+  case ISD::SHL:                return visitSHL(N);
+  case ISD::SRA:                return visitSRA(N);
+  case ISD::SRL:                return visitSRL(N);
+  case ISD::CTLZ:               return visitCTLZ(N);
+  case ISD::CTTZ:               return visitCTTZ(N);
+  case ISD::CTPOP:              return visitCTPOP(N);
+  case ISD::SELECT:             return visitSELECT(N);
+  case ISD::SELECT_CC:          return visitSELECT_CC(N);
+  case ISD::SETCC:              return visitSETCC(N);
+  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
+  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
+  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
+  case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::TRUNCATE:           return visitTRUNCATE(N);
+  case ISD::BIT_CONVERT:        return visitBIT_CONVERT(N);
+  case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
+  case ISD::FADD:               return visitFADD(N);
+  case ISD::FSUB:               return visitFSUB(N);
+  case ISD::FMUL:               return visitFMUL(N);
+  case ISD::FDIV:               return visitFDIV(N);
+  case ISD::FREM:               return visitFREM(N);
+  case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
+  case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
+  case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
+  case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
+  case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
+  case ISD::FP_ROUND:           return visitFP_ROUND(N);
+  case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
+  case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
+  case ISD::FNEG:               return visitFNEG(N);
+  case ISD::FABS:               return visitFABS(N);
+  case ISD::BRCOND:             return visitBRCOND(N);
+  case ISD::BR_CC:              return visitBR_CC(N);
+  case ISD::LOAD:               return visitLOAD(N);
+  case ISD::STORE:              return visitSTORE(N);
+  case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
+  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+  case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
+  case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
+  case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+  SDValue RV = visit(N);
+
+  // If nothing happened, try a target-specific DAG combine.
+  if (RV.getNode() == 0) {
+    assert(N->getOpcode() != ISD::DELETED_NODE &&
+           "Node was deleted but visit returned NULL!");
+
+    if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+        TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+      // Expose the DAG combiner to the target combiner impls.
+      TargetLowering::DAGCombinerInfo
+        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+
+      RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+    }
+  }
+
+  // If N is a commutative binary node, try commuting it to enable more
+  // sdisel CSE.
+  if (RV.getNode() == 0 &&
+      SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+      N->getNumValues() == 1) {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+
+    // Constant operands are canonicalized to RHS.
+    if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+      SDValue Ops[] = { N1, N0 };
+      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+                                            Ops, 2);
+      if (CSENode)
+        return SDValue(CSENode, 0);
+    }
+  }
+
+  return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+  if (unsigned NumOps = N->getNumOperands()) {
+    if (N->getOperand(0).getValueType() == MVT::Other)
+      return N->getOperand(0);
+    else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+      return N->getOperand(NumOps-1);
+    for (unsigned i = 1; i < NumOps-1; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other)
+        return N->getOperand(i);
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+  // If N has two operands, where one has an input chain equal to the other,
+  // the 'other' chain is redundant.
+  if (N->getNumOperands() == 2) {
+    if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+      return N->getOperand(0);
+    if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+      return N->getOperand(1);
+  }
+
+  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
+  SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
+  SmallPtrSet<SDNode*, 16> SeenOps;
+  bool Changed = false;             // If we should replace this token factor.
+
+  // Start out with this token factor.
+  TFs.push_back(N);
+
+  // Iterate through token factors.  The TFs grows when new token factors are
+  // encountered.
+  for (unsigned i = 0; i < TFs.size(); ++i) {
+    SDNode *TF = TFs[i];
+
+    // Check each of the operands.
+    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+      SDValue Op = TF->getOperand(i);
+
+      switch (Op.getOpcode()) {
+      case ISD::EntryToken:
+        // Entry tokens don't need to be added to the list. They are
+        // rededundant.
+        Changed = true;
+        break;
+
+      case ISD::TokenFactor:
+        if (Op.hasOneUse() &&
+            std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+          // Queue up for processing.
+          TFs.push_back(Op.getNode());
+          // Clean up in case the token factor is removed.
+          AddToWorkList(Op.getNode());
+          Changed = true;
+          break;
+        }
+        // Fall thru
+
+      default:
+        // Only add if it isn't already in the list.
+        if (SeenOps.insert(Op.getNode()))
+          Ops.push_back(Op);
+        else
+          Changed = true;
+        break;
+      }
+    }
+  }
+  
+  SDValue Result;
+
+  // If we've change things around then replace token factor.
+  if (Changed) {
+    if (Ops.empty()) {
+      // The entry token is the only possible outcome.
+      Result = DAG.getEntryNode();
+    } else {
+      // New and improved token factor.
+      Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                           MVT::Other, &Ops[0], Ops.size());
+    }
+
+    // Don't add users to work list.
+    return CombineTo(N, Result, false);
+  }
+
+  return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+  WorkListRemover DeadNodes(*this);
+  // Replacing results may cause a different MERGE_VALUES to suddenly
+  // be CSE'd with N, and carry its uses with it. Iterate until no
+  // uses remain, to ensure that the node can be safely deleted.
+  do {
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+                                    &DeadNodes);
+  } while (!N->use_empty());
+  removeFromWorkList(N);
+  DAG.DeleteNode(N);
+  return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+                              SelectionDAG &DAG) {
+  EVT VT = N0.getValueType();
+  SDValue N00 = N0.getOperand(0);
+  SDValue N01 = N0.getOperand(1);
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+  if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+      isa<ConstantSDNode>(N00.getOperand(1))) {
+    // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+    N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+                     DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+                                 N00.getOperand(0), N01),
+                     DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+                                 N00.getOperand(1), N01));
+    return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (add x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (add c1, c2) -> c1+c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+  // fold (add x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (add Sym, c) -> Sym+c
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+        GA->getOpcode() == ISD::GlobalAddress)
+      return DAG.getGlobalAddress(GA->getGlobal(), VT,
+                                  GA->getOffset() +
+                                    (uint64_t)N1C->getSExtValue());
+  // fold ((c1-A)+c2) -> (c1+c2)-A
+  if (N1C && N0.getOpcode() == ISD::SUB)
+    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+      return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                         DAG.getConstant(N1C->getAPIntValue()+
+                                         N0C->getAPIntValue(), VT),
+                         N0.getOperand(1));
+  // reassociate add
+  SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+  if (RADD.getNode() != 0)
+    return RADD;
+  // fold ((0-A) + B) -> B-A
+  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+  // fold (A + (0-B)) -> A-B
+  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+  // fold (A+(B-A)) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+    return N1.getOperand(0);
+  // fold ((B-A)+A) -> B
+  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+    return N0.getOperand(0);
+  // fold (A+(B-(A+C))) to (B-C)
+  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+      N0 == N1.getOperand(1).getOperand(0))
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+                       N1.getOperand(1).getOperand(1));
+  // fold (A+(B-(C+A))) to (B-C)
+  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+      N0 == N1.getOperand(1).getOperand(1))
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+                       N1.getOperand(1).getOperand(0));
+  // fold (A+((B-A)+or-C)) to (B+or-C)
+  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+      N1.getOperand(0).getOpcode() == ISD::SUB &&
+      N0 == N1.getOperand(0).getOperand(1))
+    return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+                       N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+    SDValue N00 = N0.getOperand(0);
+    SDValue N01 = N0.getOperand(1);
+    SDValue N10 = N1.getOperand(0);
+    SDValue N11 = N1.getOperand(1);
+
+    if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+      return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+                         DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+  }
+
+  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (a+b) -> (a|b) iff a and b share no bits.
+  if (VT.isInteger() && !VT.isVector()) {
+    APInt LHSZero, LHSOne;
+    APInt RHSZero, RHSOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+    if (LHSZero.getBoolValue()) {
+      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+      if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+        return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+    }
+  }
+
+  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+  if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+    SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+    if (Result.getNode()) return Result;
+  }
+  if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+    if (Result.getNode()) return Result;
+  }
+
+  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+  if (N1.getOpcode() == ISD::SHL &&
+      N1.getOperand(0).getOpcode() == ISD::SUB)
+    if (ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+      if (C->getAPIntValue() == 0)
+        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                                       N1.getOperand(0).getOperand(1),
+                                       N1.getOperand(1)));
+  if (N0.getOpcode() == ISD::SHL &&
+      N0.getOperand(0).getOpcode() == ISD::SUB)
+    if (ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+      if (C->getAPIntValue() == 0)
+        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                                       N0.getOperand(0).getOperand(1),
+                                       N0.getOperand(1)));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // If the flag result is dead, turn this into an ADD.
+  if (N->hasNUsesOfValue(0, 1))
+    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE,
+                                 N->getDebugLoc(), MVT::Flag));
+
+  // canonicalize constant to RHS.
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+  // fold (addc x, 0) -> x + no carry out
+  if (N1C && N1C->isNullValue())
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+                                        N->getDebugLoc(), MVT::Flag));
+
+  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+  APInt LHSZero, LHSOne;
+  APInt RHSZero, RHSOne;
+  APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+  if (LHSZero.getBoolValue()) {
+    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+    // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+    // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+    if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+        (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+      return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+                       DAG.getNode(ISD::CARRY_FALSE,
+                                   N->getDebugLoc(), MVT::Flag));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+                       N1, N0, CarryIn);
+
+  // fold (adde x, y, false) -> (addc x, y)
+  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (sub x, x) -> 0
+  if (N0 == N1)
+    return DAG.getConstant(0, N->getValueType(0));
+  // fold (sub c1, c2) -> c1-c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+  // fold (sub x, c) -> (add x, -c)
+  if (N1C)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(-N1C->getAPIntValue(), VT));
+  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+  if (N0C && N0C->isAllOnesValue())
+    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+  // fold (A+B)-A -> B
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+    return N0.getOperand(1);
+  // fold (A+B)-B -> A
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+    return N0.getOperand(0);
+  // fold ((A+(B+or-C))-B) -> A+or-C
+  if (N0.getOpcode() == ISD::ADD &&
+      (N0.getOperand(1).getOpcode() == ISD::SUB ||
+       N0.getOperand(1).getOpcode() == ISD::ADD) &&
+      N0.getOperand(1).getOperand(0) == N1)
+    return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(1));
+  // fold ((A+(C+B))-B) -> A+C
+  if (N0.getOpcode() == ISD::ADD &&
+      N0.getOperand(1).getOpcode() == ISD::ADD &&
+      N0.getOperand(1).getOperand(1) == N1)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+  // fold ((A-(B-C))-C) -> A-B
+  if (N0.getOpcode() == ISD::SUB &&
+      N0.getOperand(1).getOpcode() == ISD::SUB &&
+      N0.getOperand(1).getOperand(1) == N1)
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+  // If either operand of a sub is undef, the result is undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  // If the relocation model supports it, consider symbol offsets.
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+      // fold (sub Sym, c) -> Sym-c
+      if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+        return DAG.getGlobalAddress(GA->getGlobal(), VT,
+                                    GA->getOffset() -
+                                      (uint64_t)N1C->getSExtValue());
+      // fold (sub Sym+c1, Sym+c2) -> c1-c2
+      if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+        if (GA->getGlobal() == GB->getGlobal())
+          return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+                                 VT);
+    }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (mul x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (mul c1, c2) -> c1*c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+  // fold (mul x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mul x, -1) -> 0-x
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), N0);
+  // fold (mul x, (1 << c)) -> x << c
+  if (N1C && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy()));
+  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+  if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+    unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+    // FIXME: If the input is something that is easily negated (e.g. a
+    // single-use add), we should put the negate there.
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT),
+                       DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                            DAG.getConstant(Log2Val, getShiftAmountTy())));
+  }
+  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+  if (N1C && N0.getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(N0.getOperand(1))) {
+    SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                             N1, N0.getOperand(1));
+    AddToWorkList(C3.getNode());
+    return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                       N0.getOperand(0), C3);
+  }
+
+  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+  // use.
+  {
+    SDValue Sh(0,0), Y(0,0);
+    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
+    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+        N0.getNode()->hasOneUse()) {
+      Sh = N0; Y = N1;
+    } else if (N1.getOpcode() == ISD::SHL &&
+               isa<ConstantSDNode>(N1.getOperand(1)) &&
+               N1.getNode()->hasOneUse()) {
+      Sh = N1; Y = N0;
+    }
+
+    if (Sh.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                Sh.getOperand(0), Y);
+      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                         Mul, Sh.getOperand(1));
+    }
+  }
+
+  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+      isa<ConstantSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+                       DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+                                   N0.getOperand(0), N1),
+                       DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  // reassociate mul
+  SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+  if (RMUL.getNode() != 0)
+    return RMUL;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (sdiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+  // fold (sdiv X, 1) -> X
+  if (N1C && N1C->getSExtValue() == 1LL)
+    return N0;
+  // fold (sdiv X, -1) -> 0-X
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), N0);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
+  if (!VT.isVector()) {
+    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+                         N0, N1);
+  }
+  // fold (sdiv X, pow2) -> simple ops after legalize
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+      (isPowerOf2_64(N1C->getSExtValue()) ||
+       isPowerOf2_64(-N1C->getSExtValue()))) {
+    // If dividing by powers of two is cheap, then don't perform the following
+    // fold.
+    if (TLI.isPow2DivCheap())
+      return SDValue();
+
+    int64_t pow2 = N1C->getSExtValue();
+    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+    unsigned lg2 = Log2_64(abs2);
+
+    // Splat the sign bit into the register
+    SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+                              DAG.getConstant(VT.getSizeInBits()-1,
+                                              getShiftAmountTy()));
+    AddToWorkList(SGN.getNode());
+
+    // Add (N0 < 0) ? abs2 - 1 : 0;
+    SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+                              DAG.getConstant(VT.getSizeInBits() - lg2,
+                                              getShiftAmountTy()));
+    SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+    AddToWorkList(SRL.getNode());
+    AddToWorkList(ADD.getNode());    // Divide by pow2
+    SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+                              DAG.getConstant(lg2, getShiftAmountTy()));
+
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (pow2 > 0)
+      return SRA;
+
+    AddToWorkList(SRA.getNode());
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), SRA);
+  }
+
+  // if integer divide is expensive and we satisfy the requirements, emit an
+  // alternate sequence.
+  if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
+      !TLI.isIntDivCheap()) {
+    SDValue Op = BuildSDIV(N);
+    if (Op.getNode()) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (udiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+  // fold (udiv x, (1 << c)) -> x >>u c
+  if (N1C && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy()));
+  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (SHC->getAPIntValue().isPowerOf2()) {
+        EVT ADDVT = N1.getOperand(1).getValueType();
+        SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+                                  N1.getOperand(1),
+                                  DAG.getConstant(SHC->getAPIntValue()
+                                                                  .logBase2(),
+                                                  ADDVT));
+        AddToWorkList(Add.getNode());
+        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+      }
+    }
+  }
+  // fold (udiv x, c) -> alternate
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+    SDValue Op = BuildUDIV(N);
+    if (Op.getNode()) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (srem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+  if (!VT.isVector()) {
+    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+  }
+
+  // If X/C can be simplified by the division-by-constant logic, lower
+  // X%C to the equivalent of X-X/C*C.
+  if (N1C && !N1C->isNullValue()) {
+    SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+    AddToWorkList(Div.getNode());
+    SDValue OptimizedDiv = combine(Div.getNode());
+    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                OptimizedDiv, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+      AddToWorkList(Mul.getNode());
+      return Sub;
+    }
+  }
+
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (urem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+  // fold (urem x, pow2) -> (and x, pow2-1)
+  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue()-1,VT));
+  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (SHC->getAPIntValue().isPowerOf2()) {
+        SDValue Add =
+          DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+                                 VT));
+        AddToWorkList(Add.getNode());
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+      }
+    }
+  }
+
+  // If X/C can be simplified by the division-by-constant logic, lower
+  // X%C to the equivalent of X-X/C*C.
+  if (N1C && !N1C->isNullValue()) {
+    SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+    AddToWorkList(Div.getNode());
+    SDValue OptimizedDiv = combine(Div.getNode());
+    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                OptimizedDiv, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+      AddToWorkList(Mul.getNode());
+      return Sub;
+    }
+  }
+
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (mulhs x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhs x, 1) -> (sra x, size(x)-1)
+  if (N1C && N1C->getAPIntValue() == 1)
+    return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+                       DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+                                       getShiftAmountTy()));
+  // fold (mulhs x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (mulhu x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhu x, 1) -> 0
+  if (N1C && N1C->getAPIntValue() == 1)
+    return DAG.getConstant(0, N0.getValueType());
+  // fold (mulhu x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+                                                unsigned HiOp) {
+  // If the high half is not needed, just compute the low half.
+  bool HiExists = N->hasAnyUseOfValue(1);
+  if (!HiExists &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+    SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+                              N->op_begin(), N->getNumOperands());
+    return CombineTo(N, Res, Res);
+  }
+
+  // If the low half is not needed, just compute the high half.
+  bool LoExists = N->hasAnyUseOfValue(0);
+  if (!LoExists &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+    SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+                              N->op_begin(), N->getNumOperands());
+    return CombineTo(N, Res, Res);
+  }
+
+  // If both halves are used, return as it is.
+  if (LoExists && HiExists)
+    return SDValue();
+
+  // If the two computed results can be simplified separately, separate them.
+  if (LoExists) {
+    SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+                             N->op_begin(), N->getNumOperands());
+    AddToWorkList(Lo.getNode());
+    SDValue LoOpt = combine(Lo.getNode());
+    if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+      return CombineTo(N, LoOpt, LoOpt);
+  }
+
+  if (HiExists) {
+    SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+                             N->op_begin(), N->getNumOperands());
+    AddToWorkList(Hi.getNode());
+    SDValue HiOpt = combine(Hi.getNode());
+    if (HiOpt.getNode() && HiOpt != Hi &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+      return CombineTo(N, HiOpt, HiOpt);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+  if (Res.getNode()) return Res;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+  if (Res.getNode()) return Res;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+  if (Res.getNode()) return Res;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+  if (Res.getNode()) return Res;
+
+  return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
+  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+  // Bail early if none of these transforms apply.
+  if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
+  // For each of OP in AND/OR/XOR:
+  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+  //
+  // do not sink logical op inside of a vector extend, since it may combine
+  // into a vsetcc.
+  EVT Op0VT = N0.getOperand(0).getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND  ||
+       N0.getOpcode() == ISD::SIGN_EXTEND ||
+       (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
+      !VT.isVector() &&
+      Op0VT == N1.getOperand(0).getValueType() &&
+      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
+    SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+                                 N0.getOperand(0).getValueType(),
+                                 N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.getNode());
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+  }
+
+  // For each of OP in SHL/SRL/SRA/AND...
+  //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+  //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
+  //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+       N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+      N0.getOperand(1) == N1.getOperand(1)) {
+    SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+                                 N0.getOperand(0).getValueType(),
+                                 N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.getNode());
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                       ORNode, N0.getOperand(1));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N1.getValueType();
+  unsigned BitWidth = VT.getSizeInBits();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (and x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (and c1, c2) -> c1&c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+  // fold (and x, -1) -> x
+  if (N1C && N1C->isAllOnesValue())
+    return N0;
+  // if (and x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+                                   APInt::getAllOnesValue(BitWidth)))
+    return DAG.getConstant(0, VT);
+  // reassociate and
+  SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+  if (RAND.getNode() != 0)
+    return RAND;
+  // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+  if (N1C && N0.getOpcode() == ISD::OR)
+    if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+        return N1;
+  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    SDValue N0Op0 = N0.getOperand(0);
+    APInt Mask = ~N1C->getAPIntValue();
+    Mask.trunc(N0Op0.getValueSizeInBits());
+    if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+      SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+                                 N0.getValueType(), N0Op0);
+
+      // Replace uses of the AND with uses of the Zero extend node.
+      CombineTo(N, Zext);
+
+      // We actually want to replace all uses of the any_extend with the
+      // zero_extend, to avoid duplicating things.  This will later cause this
+      // AND to be folded.
+      CombineTo(N0.getNode(), Zext);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        LL.getValueType().isInteger()) {
+      // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+      if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+        SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+      // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+        SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+                                      LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+      }
+      // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+        SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+        return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+  // fold (and (sra)) -> (and (srl)) when possible.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (zext_inreg (extload x)) -> (zextload x)
+  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                                     BitWidth - MemVT.getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->isVolatile(), LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                                     BitWidth - MemVT.getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->isVolatile(), LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (and (load x), 255) -> (zextload x, i8)
+  // fold (and (extload x, i16), 255) -> (zextload x, i8)
+  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+  if (N1C && (N0.getOpcode() == ISD::LOAD ||
+              (N0.getOpcode() == ISD::ANY_EXTEND &&
+               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+    LoadSDNode *LN0 = HasAnyExt
+      ? cast<LoadSDNode>(N0.getOperand(0))
+      : cast<LoadSDNode>(N0);
+    if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+        LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
+      uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+        EVT LoadedVT = LN0->getMemoryVT();
+
+        if (ExtVT == LoadedVT &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          
+          SDValue NewLoad = 
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), LN0->getBasePtr(),
+                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           ExtVT, LN0->isVolatile(), LN0->getAlignment());
+          AddToWorkList(N);
+          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+        
+        // Do not change the width of a volatile load.
+        // Do not generate loads of non-round integer types since these can
+        // be expensive (and would be wrong if the type is not byte sized).
+        if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT PtrType = LN0->getOperand(1).getValueType();
+
+          unsigned Alignment = LN0->getAlignment();
+          SDValue NewPtr = LN0->getBasePtr();
+
+          // For big endian targets, we need to add an offset to the pointer
+          // to load the correct bytes.  For little endian systems, we merely
+          // need to read fewer bytes from the same pointer.
+          if (TLI.isBigEndian()) {
+            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+            unsigned EVTStoreBytes = ExtVT.getStoreSize();
+            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+            NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            Alignment = MinAlign(Alignment, PtrOff);
+          }
+
+          AddToWorkList(NewPtr.getNode());
+          
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          SDValue Load =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), NewPtr,
+                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           ExtVT, LN0->isVolatile(), Alignment);
+          AddToWorkList(N);
+          CombineTo(LN0, Load, Load.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N1.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (or x, undef) -> -1
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+  }
+  // fold (or c1, c2) -> c1|c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+  // fold (or x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (or x, -1) -> -1
+  if (N1C && N1C->isAllOnesValue())
+    return N1;
+  // fold (or x, c) -> c iff (x & ~c) == 0
+  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+    return N1;
+  // reassociate or
+  SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+  if (ROR.getNode() != 0)
+    return ROR;
+  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+             isa<ConstantSDNode>(N0.getOperand(1))) {
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                                   N0.getOperand(0), N1),
+                       DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+  }
+  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        LL.getValueType().isInteger()) {
+      // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+      // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+      if (cast<ConstantSDNode>(LR)->isNullValue() &&
+          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+        SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+      // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+      // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+        SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+                                      LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+        return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
+  if (N0.getOpcode() == ISD::AND &&
+      N1.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      N1.getOperand(1).getOpcode() == ISD::Constant &&
+      // Don't increase # computations.
+      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+    // We can only do this xform if we know that bits from X that are set in C2
+    // but not in C1 are already zero.  Likewise for Y.
+    const APInt &LHSMask =
+      cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    const APInt &RHSMask =
+      cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+      SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                              N0.getOperand(0), N1.getOperand(0));
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+                         DAG.getConstant(LHSMask | RHSMask, VT));
+    }
+  }
+
+  // See if this is some rotate idiom.
+  if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+    return SDValue(Rot, 0);
+
+  return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+  if (Op.getOpcode() == ISD::AND) {
+    if (isa<ConstantSDNode>(Op.getOperand(1))) {
+      Mask = Op.getOperand(1);
+      Op = Op.getOperand(0);
+    } else {
+      return false;
+    }
+  }
+
+  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+    Shift = Op;
+    return true;
+  }
+
+  return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+  // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
+  EVT VT = LHS.getValueType();
+  if (!TLI.isTypeLegal(VT)) return 0;
+
+  // The target must have at least one rotate flavor.
+  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+  if (!HasROTL && !HasROTR) return 0;
+
+  // Match "(X shl/srl V1) & V2" where V2 may not be present.
+  SDValue LHSShift;   // The shift.
+  SDValue LHSMask;    // AND value if any.
+  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+    return 0; // Not part of a rotate.
+
+  SDValue RHSShift;   // The shift.
+  SDValue RHSMask;    // AND value if any.
+  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+    return 0; // Not part of a rotate.
+
+  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+    return 0;   // Not shifting the same value.
+
+  if (LHSShift.getOpcode() == RHSShift.getOpcode())
+    return 0;   // Shifts must disagree.
+
+  // Canonicalize shl to left side in a shl/srl pair.
+  if (RHSShift.getOpcode() == ISD::SHL) {
+    std::swap(LHS, RHS);
+    std::swap(LHSShift, RHSShift);
+    std::swap(LHSMask , RHSMask );
+  }
+
+  unsigned OpSizeInBits = VT.getSizeInBits();
+  SDValue LHSShiftArg = LHSShift.getOperand(0);
+  SDValue LHSShiftAmt = LHSShift.getOperand(1);
+  SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+      RHSShiftAmt.getOpcode() == ISD::Constant) {
+    uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+    uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+    if ((LShVal + RShVal) != OpSizeInBits)
+      return 0;
+
+    SDValue Rot;
+    if (HasROTL)
+      Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
+    else
+      Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+
+    // If there is an AND of either shifted operand, apply it to the result.
+    if (LHSMask.getNode() || RHSMask.getNode()) {
+      APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+      if (LHSMask.getNode()) {
+        APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+        Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+      }
+      if (RHSMask.getNode()) {
+        APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+        Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+      }
+
+      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+    }
+
+    return Rot.getNode();
+  }
+
+  // If there is a mask here, and we have a variable shift, we can't be sure
+  // that we're masking out the right stuff.
+  if (LHSMask.getNode() || RHSMask.getNode())
+    return 0;
+
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+  if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+      LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC =
+          dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+      if (SUBC->getAPIntValue() == OpSizeInBits) {
+        if (HasROTL)
+          return DAG.getNode(ISD::ROTL, DL, VT,
+                             LHSShiftArg, LHSShiftAmt).getNode();
+        else
+          return DAG.getNode(ISD::ROTR, DL, VT,
+                             LHSShiftArg, RHSShiftAmt).getNode();
+      }
+    }
+  }
+
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+  if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+      RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC =
+          dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+      if (SUBC->getAPIntValue() == OpSizeInBits) {
+        if (HasROTR)
+          return DAG.getNode(ISD::ROTR, DL, VT,
+                             LHSShiftArg, RHSShiftAmt).getNode();
+        else
+          return DAG.getNode(ISD::ROTL, DL, VT,
+                             LHSShiftArg, LHSShiftAmt).getNode();
+      }
+    }
+  }
+
+  // Look for sign/zext/any-extended or truncate cases:
+  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+    SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+    SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+    if (RExtOp0.getOpcode() == ISD::SUB &&
+        RExtOp0.getOperand(1) == LExtOp0) {
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotl x, y)
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotr x, (sub 32, y))
+      if (ConstantSDNode *SUBC =
+            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+        if (SUBC->getAPIntValue() == OpSizeInBits) {
+          return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+                             LHSShiftArg,
+                             HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+        }
+      }
+    } else if (LExtOp0.getOpcode() == ISD::SUB &&
+               RExtOp0 == LExtOp0.getOperand(1)) {
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+      //   (rotr x, y)
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+      //   (rotl x, (sub 32, y))
+      if (ConstantSDNode *SUBC =
+            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+        if (SUBC->getAPIntValue() == OpSizeInBits) {
+          return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+                             LHSShiftArg,
+                             HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LHS, RHS, CC;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (xor x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (xor c1, c2) -> c1^c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+  // fold (xor x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // reassociate xor
+  SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+  if (RXOR.getNode() != 0)
+    return RXOR;
+
+  // fold !(x cc y) -> (x !cc y)
+  if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+    bool isInt = LHS.getValueType().isInteger();
+    ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                                               isInt);
+
+    if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+      switch (N0.getOpcode()) {
+      default:
+        llvm_unreachable("Unhandled SetCC Equivalent!");
+      case ISD::SETCC:
+        return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+      case ISD::SELECT_CC:
+        return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+                               N0.getOperand(3), NotCC);
+      }
+    }
+  }
+
+  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+  if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+      N0.getNode()->hasOneUse() &&
+      isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+    SDValue V = N0.getOperand(0);
+    V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+                    DAG.getConstant(1, V.getValueType()));
+    AddToWorkList(V.getNode());
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+  }
+
+  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+  if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+      return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+    }
+  }
+  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+  if (N1C && N1C->isAllOnesValue() &&
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+      return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+    }
+  }
+  // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+  if (N1C && N0.getOpcode() == ISD::XOR) {
+    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N00C)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+                         DAG.getConstant(N1C->getAPIntValue() ^
+                                         N00C->getAPIntValue(), VT));
+    if (N01C)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+                         DAG.getConstant(N1C->getAPIntValue() ^
+                                         N01C->getAPIntValue(), VT));
+  }
+  // fold (xor x, x) -> 0
+  if (N0 == N1) {
+    if (!VT.isVector()) {
+      return DAG.getConstant(0, VT);
+    } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
+      // Produce a vector of zeros.
+      SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+      std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                         &Ops[0], Ops.size());
+    }
+  }
+
+  // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // Simplify the expression using non-local knowledge.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+  SDNode *LHS = N->getOperand(0).getNode();
+  if (!LHS->hasOneUse()) return SDValue();
+
+  // We want to pull some binops through shifts, so that we have (and (shift))
+  // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
+  // thing happens with address calculations, so it's important to canonicalize
+  // it.
+  bool HighBitSet = false;  // Can we transform this if the high bit is set?
+
+  switch (LHS->getOpcode()) {
+  default: return SDValue();
+  case ISD::OR:
+  case ISD::XOR:
+    HighBitSet = false; // We can only transform sra if the high bit is clear.
+    break;
+  case ISD::AND:
+    HighBitSet = true;  // We can only transform sra if the high bit is set.
+    break;
+  case ISD::ADD:
+    if (N->getOpcode() != ISD::SHL)
+      return SDValue(); // only shl(add) not sr[al](add).
+    HighBitSet = false; // We can only transform sra if the high bit is clear.
+    break;
+  }
+
+  // We require the RHS of the binop to be a constant as well.
+  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+  if (!BinOpCst) return SDValue();
+
+  // FIXME: disable this unless the input to the binop is a shift by a constant.
+  // If it is not a shift, it pessimizes some common cases like:
+  //
+  //    void foo(int *X, int i) { X[i & 1235] = 1; }
+  //    int bar(int *X, int i) { return X[i & 255]; }
+  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+  if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+       BinOpLHSVal->getOpcode() != ISD::SRA &&
+       BinOpLHSVal->getOpcode() != ISD::SRL) ||
+      !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // If this is a signed shift right, and the high bit is modified by the
+  // logical operation, do not perform the transformation. The highBitSet
+  // boolean indicates the value of the high bit of the constant which would
+  // cause it to be modified for this operation.
+  if (N->getOpcode() == ISD::SRA) {
+    bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+    if (BinOpRHSSignSet != HighBitSet)
+      return SDValue();
+  }
+
+  // Fold the constants, shifting the binop RHS by the shift amount.
+  SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+                               N->getValueType(0),
+                               LHS->getOperand(1), N->getOperand(1));
+
+  // Create the new shift.
+  SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+                                 VT, LHS->getOperand(0), N->getOperand(1));
+
+  // Create the new binop.
+  return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (shl c1, c2) -> c1<<c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+  // fold (shl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (shl x, c >= size(x)) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (shl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (shl x, c) is known to be zero, return 0
+  if (DAG.MaskedValueIsZero(SDValue(N, 0),
+                            APInt::getAllOnesValue(OpSizeInBits)))
+    return DAG.getConstant(0, VT);
+  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC.trunc(TruncVT.getSizeInBits());
+      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SHL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    if (c1 + c2 > OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+  // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
+  //                               (srl (and x, (shl -1, c1)), (sub c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SRL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    if (c1 < VT.getSizeInBits()) {
+      uint64_t c2 = N1C->getZExtValue();
+      SDValue HiBitsMask =
+        DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                              VT.getSizeInBits() - c1),
+                        VT);
+      SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
+                                 N0.getOperand(0),
+                                 HiBitsMask);
+      if (c2 > c1)
+        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c2-c1, N1.getValueType()));
+      else
+        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c1-c2, N1.getValueType()));
+    }
+  }
+  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+    SDValue HiBitsMask =
+      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                            VT.getSizeInBits() -
+                                              N1C->getZExtValue()),
+                      VT);
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       HiBitsMask);
+  }
+
+  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (sra c1, c2) -> (sra c1, c2)
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+  // fold (sra 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (sra -1, x) -> -1
+  if (N0C && N0C->isAllOnesValue())
+    return N0;
+  // fold (sra x, (setge c, size(x))) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (sra x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+  // sext_inreg.
+  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+    unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+    if (VT.isVector())
+      ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               ExtVT, VT.getVectorNumElements());
+    if ((!LegalOperations ||
+         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                         N0.getOperand(0), DAG.getValueType(ExtVT));
+  }
+
+  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SRA) {
+    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+      if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+      return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+                         DAG.getConstant(Sum, N1C->getValueType(0)));
+    }
+  }
+
+  // fold (sra (shl X, m), (sub result_size, n))
+  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+  // result_size - n != m.
+  // If truncate is free for the target sext(shl) is likely to result in better
+  // code.
+  if (N0.getOpcode() == ISD::SHL) {
+    // Get the two constanst of the shifts, CN0 = m, CN = n.
+    const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N01C && N1C) {
+      // Determine what the truncate's result bitsize and type would be.
+      EVT TruncVT =
+        EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
+      // Determine the residual right-shift amount.
+      signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+      // If the shift is not a no-op (in which case this should be just a sign
+      // extend already), the truncated to type is legal, sign_extend is legal
+      // on that type, and the truncate to that type is both legal and free,
+      // perform the transform.
+      if ((ShiftAmt > 0) &&
+          TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+          TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+          TLI.isTruncateFree(VT, TruncVT)) {
+
+          SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());
+          SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+                                      N0.getOperand(0), Amt);
+          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+                                      Shift);
+          return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+                             N->getValueType(0), Trunc);
+      }
+    }
+  }
+
+  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+      return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(),
+                                     TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  // Simplify, based on bits shifted out of the LHS.
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+
+  // If the sign bit is known to be zero, switch this to a SRL.
+  if (DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (srl c1, c2) -> c1 >>u c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+  // fold (srl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (srl x, c >= size(x)) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (srl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (srl x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+                                   APInt::getAllOnesValue(OpSizeInBits)))
+    return DAG.getConstant(0, VT);
+
+  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SRL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    if (c1 + c2 > OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+
+  // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    // Shifting in all undef bits?
+    EVT SmallVT = N0.getOperand(0).getValueType();
+    if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+      return DAG.getUNDEF(VT);
+
+    SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+                                     N0.getOperand(0), N1);
+    AddToWorkList(SmallShift.getNode());
+    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+  }
+
+  // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
+  // bit, which is unmodified by sra.
+  if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+    if (N0.getOpcode() == ISD::SRA)
+      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+  }
+
+  // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
+  if (N1C && N0.getOpcode() == ISD::CTLZ &&
+      N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+    APInt KnownZero, KnownOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+
+    // If any of the input bits are KnownOne, then the input couldn't be all
+    // zeros, thus the result of the srl will always be zero.
+    if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+    // If all of the bits input the to ctlz node are known to be zero, then
+    // the result of the ctlz is "32" and the result of the shift is one.
+    APInt UnknownBits = ~KnownZero & Mask;
+    if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+    // Otherwise, check to see if there is exactly one bit input to the ctlz.
+    if ((UnknownBits & (UnknownBits - 1)) == 0) {
+      // Okay, we know that only that the single bit specified by UnknownBits
+      // could be set on input to the CTLZ node. If this bit is set, the SRL
+      // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+      // to an SRL/XOR pair, which is likely to simplify more.
+      unsigned ShAmt = UnknownBits.countTrailingZeros();
+      SDValue Op = N0.getOperand(0);
+
+      if (ShAmt) {
+        Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+                         DAG.getConstant(ShAmt, getShiftAmountTy()));
+        AddToWorkList(Op.getNode());
+      }
+
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+                         Op, DAG.getConstant(1, VT));
+    }
+  }
+
+  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC.trunc(TruncVT.getSizeInBits());
+      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(),
+                                     TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  // fold operands of srl based on knowledge that the low bits are not
+  // demanded.
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  if (N1C) {
+    SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRL.getNode())
+      return NewSRL;
+  }
+
+  // Here is a common situation. We want to optimize:
+  //
+  //   %a = ...
+  //   %b = and i32 %a, 2
+  //   %c = srl i32 %b, 1
+  //   brcond i32 %c ...
+  //
+  // into
+  // 
+  //   %a = ...
+  //   %b = and %a, 2
+  //   %c = setcc eq %b, 0
+  //   brcond %c ...
+  //
+  // However when after the source operand of SRL is optimized into AND, the SRL
+  // itself may not be optimized further. Look for it and add the BRCOND into
+  // the worklist.
+  if (N->hasOneUse()) {
+    SDNode *Use = *N->use_begin();
+    if (Use->getOpcode() == ISD::BRCOND)
+      AddToWorkList(Use);
+    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+      // Also look pass the truncate.
+      Use = *Use->use_begin();
+      if (Use->getOpcode() == ISD::BRCOND)
+        AddToWorkList(Use);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctlz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (cttz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctpop c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+  EVT VT = N->getValueType(0);
+  EVT VT0 = N0.getValueType();
+
+  // fold (select C, X, X) -> X
+  if (N1 == N2)
+    return N1;
+  // fold (select true, X, Y) -> X
+  if (N0C && !N0C->isNullValue())
+    return N1;
+  // fold (select false, X, Y) -> Y
+  if (N0C && N0C->isNullValue())
+    return N2;
+  // fold (select C, 1, X) -> (or C, X)
+  if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+  // fold (select C, 0, 1) -> (xor C, 1)
+  if (VT.isInteger() &&
+      (VT0 == MVT::i1 ||
+       (VT0.isInteger() &&
+        TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+      N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+    SDValue XORNode;
+    if (VT == VT0)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+                         N0, DAG.getConstant(1, VT0));
+    XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+                          N0, DAG.getConstant(1, VT0));
+    AddToWorkList(XORNode.getNode());
+    if (VT.bitsGT(VT0))
+      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+  }
+  // fold (select C, 0, X) -> (and (not C), X)
+  if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+    SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+    AddToWorkList(NOTNode.getNode());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+  }
+  // fold (select C, X, 1) -> (or (not C), X)
+  if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+    SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+    AddToWorkList(NOTNode.getNode());
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+  }
+  // fold (select C, X, 0) -> (and C, X)
+  if (VT == MVT::i1 && N2C && N2C->isNullValue())
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+  // fold (select X, X, Y) -> (or X, Y)
+  // fold (select X, 1, Y) -> (or X, Y)
+  if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+  // fold (select X, Y, X) -> (and X, Y)
+  // fold (select X, Y, 0) -> (and X, Y)
+  if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N1, N2))
+    return SDValue(N, 0);  // Don't revisit N.
+
+  // fold selects based on a setcc into other things, such as min/max/abs
+  if (N0.getOpcode() == ISD::SETCC) {
+    // FIXME:
+    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+    // having to say they don't support SELECT_CC on every type the DAG knows
+    // about, since there is no way to mark an opcode illegal at all value types
+    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+      return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         N1, N2, N0.getOperand(2));
+    return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  SDValue N3 = N->getOperand(3);
+  SDValue N4 = N->getOperand(4);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+  // fold select_cc lhs, rhs, x, x, cc -> x
+  if (N2 == N3)
+    return N2;
+
+  // Determine if the condition we're dealing with is constant
+  SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+                              N0, N1, CC, N->getDebugLoc(), false);
+  if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+    if (!SCCC->isNullValue())
+      return N2;    // cond always true -> true val
+    else
+      return N3;    // cond always false -> false val
+  }
+
+  // Fold to a simpler select_cc
+  if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+                       SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+                       SCC.getOperand(2));
+
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N2, N3))
+    return SDValue(N, 0);  // Don't revisit N.
+
+  // fold select_cc into other things, such as min/max/abs
+  return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+                       cast<CondCodeSDNode>(N->getOperand(2))->get(),
+                       N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+                                    unsigned ExtOpc,
+                                    SmallVector<SDNode*, 4> &ExtendNodes,
+                                    const TargetLowering &TLI) {
+  bool HasCopyToRegUses = false;
+  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+                            UE = N0.getNode()->use_end();
+       UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == N)
+      continue;
+    if (UI.getUse().getResNo() != N0.getResNo())
+      continue;
+    // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+    if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+      ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+      if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+        // Sign bits will be lost after a zext.
+        return false;
+      bool Add = false;
+      for (unsigned i = 0; i != 2; ++i) {
+        SDValue UseOp = User->getOperand(i);
+        if (UseOp == N0)
+          continue;
+        if (!isa<ConstantSDNode>(UseOp))
+          return false;
+        Add = true;
+      }
+      if (Add)
+        ExtendNodes.push_back(User);
+      continue;
+    }
+    // If truncates aren't free and there are users we can't
+    // extend, it isn't worthwhile.
+    if (!isTruncFree)
+      return false;
+    // Remember if this value is live-out.
+    if (User->getOpcode() == ISD::CopyToReg)
+      HasCopyToRegUses = true;
+  }
+
+  if (HasCopyToRegUses) {
+    bool BothLiveOut = false;
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI) {
+      SDUse &Use = UI.getUse();
+      if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+        BothLiveOut = true;
+        break;
+      }
+    }
+    if (BothLiveOut)
+      // Both unextended and extended values are live out. There had better be
+      // good a reason for the transformation.
+      return ExtendNodes.size();
+  }
+  return true;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (sext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+  // fold (sext (sext x)) -> (sext x)
+  // fold (sext (aext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    // fold (sext (truncate (load x))) -> (sext (smaller load x))
+    // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      if (NarrowLoad.getNode() != N0.getNode())
+        CombineTo(N0.getNode(), NarrowLoad);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+
+    // See if the value being truncated is already sign extended.  If so, just
+    // eliminate the trunc/sext pair.
+    SDValue Op = N0.getOperand(0);
+    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
+    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
+    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+    if (OpBits == DestBits) {
+      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+      // bits, it is already ready.
+      if (NumSignBits > DestBits-MidBits)
+        return Op;
+    } else if (OpBits < DestBits) {
+      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+      // bits, just sext from i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+    } else {
+      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+      // bits, just truncate to i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+    }
+
+    // fold (sext (truncate x)) -> (sextinreg x).
+    if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+                                                 N0.getValueType())) {
+      if (OpBits < DestBits)
+        Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+      else if (OpBits > DestBits)
+        Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+                         DAG.getValueType(N0.getValueType()));
+    }
+  }
+
+  // fold (sext (load x)) -> (sext (truncate (sextload x)))
+  if (ISD::isNON_EXTLoad(N0.getNode()) &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+      // Extend SetCC uses if necessary.
+      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+        SDNode *SetCC = SetCCs[i];
+        SmallVector<SDValue, 4> Ops;
+
+        for (unsigned j = 0; j != 2; ++j) {
+          SDValue SOp = SetCC->getOperand(j);
+          if (SOp == Trunc)
+            Ops.push_back(ExtLoad);
+          else
+            Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
+                                      N->getDebugLoc(), VT, SOp));
+        }
+
+        Ops.push_back(SetCC->getOperand(2));
+        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                                     SetCC->getValueType(0),
+                                     &Ops[0], Ops.size()));
+      }
+
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+  if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    if ((!LegalOperations && !LN0->isVolatile()) ||
+        TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->isVolatile(), LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                            N0.getValueType(), ExtLoad),
+                ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  if (N0.getOpcode() == ISD::SETCC) {
+    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    if (VT.isVector() &&
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+        VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
+      
+        // Only do this before legalize for now.
+        !LegalOperations) {
+      return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                           N0.getOperand(1),
+                           cast<CondCodeSDNode>(N0.getOperand(2))->get());
+    }
+    
+    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    SDValue NegOne =
+      DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       NegOne, DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode()) return SCC;
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+      return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                         DAG.getSetCC(N->getDebugLoc(),
+                                      TLI.getSetCCResultType(VT),
+                                      N0.getOperand(0), N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                         NegOne, DAG.getConstant(0, VT));
+  }
+  
+  
+
+  // fold (sext x) -> (zext x) if the sign bit is known zero.
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+      DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (zext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+  // fold (zext (zext x)) -> (zext x)
+  // fold (zext (aext x)) -> (zext x)
+  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  // fold (zext (truncate (load x))) -> (zext (smaller load x))
+  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      if (NarrowLoad.getNode() != N0.getNode())
+        CombineTo(N0.getNode(), NarrowLoad);
+      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+    }
+  }
+
+  // fold (zext (truncate x)) -> (and x, mask)
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
+    SDValue Op = N0.getOperand(0);
+    if (Op.getValueType().bitsLT(VT)) {
+      Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+    } else if (Op.getValueType().bitsGT(VT)) {
+      Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+    }
+    return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+                                  N0.getValueType().getScalarType());
+  }
+
+  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+  // if either of the casts is not free.
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
+    SDValue X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType().bitsLT(VT)) {
+      X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+    } else if (X.getValueType().bitsGT(VT)) {
+      X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+    }
+    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    Mask.zext(VT.getSizeInBits());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       X, DAG.getConstant(Mask, VT));
+  }
+
+  // fold (zext (load x)) -> (zext (truncate (zextload x)))
+  if (ISD::isNON_EXTLoad(N0.getNode()) &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+      // Extend SetCC uses if necessary.
+      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+        SDNode *SetCC = SetCCs[i];
+        SmallVector<SDValue, 4> Ops;
+
+        for (unsigned j = 0; j != 2; ++j) {
+          SDValue SOp = SetCC->getOperand(j);
+          if (SOp == Trunc)
+            Ops.push_back(ExtLoad);
+          else
+            Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
+                                      N->getDebugLoc(), VT, SOp));
+        }
+
+        Ops.push_back(SetCC->getOperand(2));
+        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                                     SetCC->getValueType(0),
+                                     &Ops[0], Ops.size()));
+      }
+
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+  if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    if ((!LegalOperations && !LN0->isVolatile()) ||
+        TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->isVolatile(), LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+                            ExtLoad),
+                ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode()) return SCC;
+  }
+
+  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+      isa<ConstantSDNode>(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+      N0.hasOneUse()) {
+    if (N0.getOpcode() == ISD::SHL) {
+      // If the original shl may be shifting out bits, do not perform this
+      // transformation.
+      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+      unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
+        N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
+      if (ShAmt > KnownZeroBits)
+        return SDValue();
+    }
+    DebugLoc dl = N->getDebugLoc();
+    return DAG.getNode(N0.getOpcode(), dl, VT,
+                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
+                       DAG.getNode(ISD::ZERO_EXTEND, dl,
+                                   N0.getOperand(1).getValueType(),
+                                   N0.getOperand(1)));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (aext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+  // fold (aext (aext x)) -> (aext x)
+  // fold (aext (zext x)) -> (zext x)
+  // fold (aext (sext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::ANY_EXTEND  ||
+      N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND)
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+  // fold (aext (truncate (load x))) -> (aext (smaller load x))
+  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      if (NarrowLoad.getNode() != N0.getNode())
+        CombineTo(N0.getNode(), NarrowLoad);
+      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+    }
+  }
+
+  // fold (aext (truncate x))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue TruncOp = N0.getOperand(0);
+    if (TruncOp.getValueType() == VT)
+      return TruncOp; // x iff x size == zext size.
+    if (TruncOp.getValueType().bitsGT(VT))
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+  }
+
+  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+  // if the trunc is not free.
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                          N0.getValueType())) {
+    SDValue X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType().bitsLT(VT)) {
+      X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+    } else if (X.getValueType().bitsGT(VT)) {
+      X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+    }
+    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    Mask.zext(VT.getSizeInBits());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       X, DAG.getConstant(Mask, VT));
+  }
+
+  // fold (aext (load x)) -> (aext (truncate (extload x)))
+  if (ISD::isNON_EXTLoad(N0.getNode()) &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+      // Extend SetCC uses if necessary.
+      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+        SDNode *SetCC = SetCCs[i];
+        SmallVector<SDValue, 4> Ops;
+
+        for (unsigned j = 0; j != 2; ++j) {
+          SDValue SOp = SetCC->getOperand(j);
+          if (SOp == Trunc)
+            Ops.push_back(ExtLoad);
+          else
+            Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
+                                      N->getDebugLoc(), VT, SOp));
+        }
+
+        Ops.push_back(SetCC->getOperand(2));
+        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                                     SetCC->getValueType(0),
+                                     &Ops[0], Ops.size()));
+      }
+
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+  // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
+  if (N0.getOpcode() == ISD::LOAD &&
+      !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+                                     VT, LN0->getChain(), LN0->getBasePtr(),
+                                     LN0->getSrcValue(),
+                                     LN0->getSrcValueOffset(), MemVT,
+                                     LN0->isVolatile(), LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(),
+              DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                          N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode())
+      return SCC;
+  }
+
+  return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used.  If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+  switch (V.getOpcode()) {
+  default: break;
+  case ISD::OR:
+  case ISD::XOR:
+    // If the LHS or RHS don't contribute bits to the or, drop them.
+    if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+      return V.getOperand(1);
+    if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+      return V.getOperand(0);
+    break;
+  case ISD::SRL:
+    // Only look at single-use SRLs.
+    if (!V.getNode()->hasOneUse())
+      break;
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+      // See if we can recursively simplify the LHS.
+      unsigned Amt = RHSC->getZExtValue();
+
+      // Watch out for shift count overflow though.
+      if (Amt >= Mask.getBitWidth()) break;
+      APInt NewMask = Mask << Amt;
+      SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+      if (SimplifyLHS.getNode())
+        return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+                           SimplifyLHS, V.getOperand(1));
+    }
+  }
+  return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+  unsigned Opc = N->getOpcode();
+  ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT ExtVT = VT;
+
+  // This transformation isn't valid for vector loads.
+  if (VT.isVector())
+    return SDValue();
+
+  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+  // extended to VT.
+  if (Opc == ISD::SIGN_EXTEND_INREG) {
+    ExtType = ISD::SEXTLOAD;
+    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
+      return SDValue();
+  }
+
+  unsigned EVTBits = ExtVT.getSizeInBits();
+  unsigned ShAmt = 0;
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShAmt = N01->getZExtValue();
+      // Is the shift amount a multiple of size of VT?
+      if ((ShAmt & (EVTBits-1)) == 0) {
+        N0 = N0.getOperand(0);
+        // Is the load width a multiple of size of VT?
+        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+          return SDValue();
+      }
+    }
+  }
+
+  // Do not generate loads of non-round integer types since these can
+  // be expensive (and would be wrong if the type is not byte sized).
+  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
+      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+      // Do not change the width of a volatile load.
+      !cast<LoadSDNode>(N0)->isVolatile()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT PtrType = N0.getOperand(1).getValueType();
+
+    // For big endian targets, we need to adjust the offset to the pointer to
+    // load the correct bytes.
+    if (TLI.isBigEndian()) {
+      unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+      ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+    }
+
+    uint64_t PtrOff =  ShAmt / 8;
+    unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+    SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+                                 PtrType, LN0->getBasePtr(),
+                                 DAG.getConstant(PtrOff, PtrType));
+    AddToWorkList(NewPtr.getNode());
+
+    SDValue Load = (ExtType == ISD::NON_EXTLOAD)
+      ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+                    LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
+                    LN0->isVolatile(), NewAlign)
+      : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
+                       LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
+                       ExtVT, LN0->isVolatile(), NewAlign);
+
+    // Replace the old load's chain with the new load's chain.
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+                                  &DeadNodes);
+
+    // Return the new loaded value.
+    return Load;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N1)->getVT();
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+
+  // fold (sext_in_reg c1) -> c1
+  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+  // If the input is already sign extended, just drop the extension.
+  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+    return N0;
+
+  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N1);
+  }
+
+  // fold (sext_in_reg (sext x)) -> (sext x)
+  // fold (sext_in_reg (aext x)) -> (sext x)
+  // if x is small enough.
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
+      return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+  }
+
+  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+  if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+    return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+  // fold operands of sext_in_reg based on knowledge that the top bits are not
+  // demanded.
+  if (SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (sext_in_reg (load x)) -> (smaller sextload x)
+  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+  SDValue NarrowLoad = ReduceLoadWidth(N);
+  if (NarrowLoad.getNode())
+    return NarrowLoad;
+
+  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+  if (N0.getOpcode() == ISD::SRL) {
+    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+        // We can turn this into an SRA iff the input to the SRL is already sign
+        // extended enough.
+        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+        if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+          return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+                             N0.getOperand(0), N0.getOperand(1));
+      }
+  }
+
+  // fold (sext_inreg (extload x)) -> (sextload x)
+  if (ISD::isEXTLoad(N0.getNode()) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getSrcValue(),
+                                     LN0->getSrcValueOffset(), EVT,
+                                     LN0->isVolatile(), LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse() &&
+      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getSrcValue(),
+                                     LN0->getSrcValueOffset(), EVT,
+                                     LN0->isVolatile(), LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // noop truncate
+  if (N0.getValueType() == N->getValueType(0))
+    return N0;
+  // fold (truncate c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+  // fold (truncate (truncate x)) -> (truncate x)
+  if (N0.getOpcode() == ISD::TRUNCATE)
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+      N0.getOpcode() == ISD::ANY_EXTEND) {
+    if (N0.getOperand(0).getValueType().bitsLT(VT))
+      // if the source is smaller than the dest, we still need an extend
+      return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                         N0.getOperand(0));
+    else if (N0.getOperand(0).getValueType().bitsGT(VT))
+      // if the source is larger than the dest, than we just need the truncate
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+    else
+      // if the source and dest are the same type, we can drop both the extend
+      // and the truncate.
+      return N0.getOperand(0);
+  }
+
+  // See if we can simplify the input to this truncate through knowledge that
+  // only the low bits are being used.  For example "trunc (or (shl x, 8), y)"
+  // -> trunc y
+  SDValue Shorter =
+    GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+                                             VT.getSizeInBits()));
+  if (Shorter.getNode())
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+
+  // fold (truncate (load x)) -> (smaller load x)
+  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+  return ReduceLoadWidth(N);
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+  SDValue Elt = N->getOperand(i);
+  if (Elt.getOpcode() != ISD::MERGE_VALUES)
+    return Elt.getNode();
+  return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+  assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+    return SDValue();
+  EVT LD1VT = LD1->getValueType(0);
+
+  if (ISD::isNON_EXTLoad(LD2) &&
+      LD2->hasOneUse() &&
+      // If both are volatile this would reduce the number of volatile loads.
+      // If one is volatile it might be ok, but play conservative and bail out.
+      !LD1->isVolatile() &&
+      !LD2->isVolatile() &&
+      DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+    unsigned Align = LD1->getAlignment();
+    unsigned NewAlign = TLI.getTargetData()->
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign <= Align &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+                         LD1->getBasePtr(), LD1->getSrcValue(),
+                         LD1->getSrcValueOffset(), false, Align);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+  // Only do this before legalize, since afterward the target may be depending
+  // on the bitconvert.
+  // First check to see if this is all constant.
+  if (!LegalTypes &&
+      N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+      VT.isVector()) {
+    bool isSimple = true;
+    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+      if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+          N0.getOperand(i).getOpcode() != ISD::Constant &&
+          N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+        isSimple = false;
+        break;
+      }
+
+    EVT DestEltVT = N->getValueType(0).getVectorElementType();
+    assert(!DestEltVT.isVector() &&
+           "Element type of vector ValueType must not be vector!");
+    if (isSimple)
+      return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+  }
+
+  // If the input is a constant, let getNode fold it.
+  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+    SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+    if (Res.getNode() != N) {
+      if (!LegalOperations ||
+          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+        return Res;
+
+      // Folding it resulted in an illegal node, and it's too late to
+      // do that. Clean up the old node and forego the transformation.
+      // Ideally this won't happen very often, because instcombine
+      // and the earlier dagcombine runs (where illegal nodes are
+      // permitted) should have folded most of them already.
+      DAG.DeleteNode(Res.getNode());
+    }
+  }
+
+  // (conv (conv x, t1), t2) -> (conv x, t2)
+  if (N0.getOpcode() == ISD::BIT_CONVERT)
+    return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  // fold (conv (load x)) -> (load (conv*)x)
+  // If the resultant load doesn't need a higher alignment than the original!
+  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+      // Do not change the width of a volatile load.
+      !cast<LoadSDNode>(N0)->isVolatile() &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    unsigned Align = TLI.getTargetData()->
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned OrigAlign = LN0->getAlignment();
+
+    if (Align <= OrigAlign) {
+      SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+                                 LN0->getBasePtr(),
+                                 LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                                 LN0->isVolatile(), OrigAlign);
+      AddToWorkList(N);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+                            N0.getValueType(), Load),
+                Load.getValue(1));
+      return Load;
+    }
+  }
+
+  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+  // This often reduces constant pool loads.
+  if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+      N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+    SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+                                  N0.getOperand(0));
+    AddToWorkList(NewConv.getNode());
+
+    APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+    if (N0.getOpcode() == ISD::FNEG)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+                         NewConv, DAG.getConstant(SignBit, VT));
+    assert(N0.getOpcode() == ISD::FABS);
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       NewConv, DAG.getConstant(~SignBit, VT));
+  }
+
+  // fold (bitconvert (fcopysign cst, x)) ->
+  //         (or (and (bitconvert x), sign), (and cst, (not sign)))
+  // Note that we don't handle (copysign x, cst) because this can always be
+  // folded to an fneg or fabs.
+  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+      isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+      VT.isInteger() && !VT.isVector()) {
+    unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+    if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
+      SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+                              IntXVT, N0.getOperand(1));
+      AddToWorkList(X.getNode());
+
+      // If X has a different width than the result/lhs, sext it or truncate it.
+      unsigned VTWidth = VT.getSizeInBits();
+      if (OrigXWidth < VTWidth) {
+        X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+        AddToWorkList(X.getNode());
+      } else if (OrigXWidth > VTWidth) {
+        // To get the sign bit in the right place, we have to shift it right
+        // before truncating.
+        X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+                        X.getValueType(), X,
+                        DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+        AddToWorkList(X.getNode());
+        X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+        AddToWorkList(X.getNode());
+      }
+
+      APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+      X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+                      X, DAG.getConstant(SignBit, VT));
+      AddToWorkList(X.getNode());
+
+      SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+                                VT, N0.getOperand(0));
+      Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+                        Cst, DAG.getConstant(~SignBit, VT));
+      AddToWorkList(Cst.getNode());
+
+      return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+    }
+  }
+
+  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+  if (N0.getOpcode() == ISD::BUILD_PAIR) {
+    SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+    if (CombineLD.getNode())
+      return CombineLD;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+  // If this is already the right type, we're done.
+  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+  unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+  // If this is a conversion of N elements of one type to N elements of another
+  // type, convert each element.  This handles FP<->INT cases.
+  if (SrcBitSize == DstBitSize) {
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+      SDValue Op = BV->getOperand(i);
+      // If the vector element type is not legal, the BUILD_VECTOR operands
+      // are promoted and implicitly truncated.  Make that explicit here.
+      if (Op.getValueType() != SrcEltVT)
+        Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+                                DstEltVT, Op));
+      AddToWorkList(Ops.back().getNode());
+    }
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                              BV->getValueType(0).getVectorNumElements());
+    return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                       &Ops[0], Ops.size());
+  }
+
+  // Otherwise, we're growing or shrinking the elements.  To avoid having to
+  // handle annoying details of growing/shrinking FP values, we convert them to
+  // int first.
+  if (SrcEltVT.isFloatingPoint()) {
+    // Convert the input float vector to a int vector where the elements are the
+    // same sizes.
+    assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+    BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+    SrcEltVT = IntVT;
+  }
+
+  // Now we know the input is an integer vector.  If the output is a FP type,
+  // convert to integer first, then to FP of the right size.
+  if (DstEltVT.isFloatingPoint()) {
+    assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+    SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+    // Next, convert to FP elements of the same size.
+    return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+  }
+
+  // Okay, we know the src/dst types are both integers of differing types.
+  // Handling growing first.
+  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+  if (SrcBitSize < DstBitSize) {
+    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+         i += NumInputsPerOutput) {
+      bool isLE = TLI.isLittleEndian();
+      APInt NewBits = APInt(DstBitSize, 0);
+      bool EltIsUndef = true;
+      for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+        // Shift the previously computed bits over.
+        NewBits <<= SrcBitSize;
+        SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+        if (Op.getOpcode() == ISD::UNDEF) continue;
+        EltIsUndef = false;
+
+        NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+                    zextOrTrunc(SrcBitSize).zext(DstBitSize));
+      }
+
+      if (EltIsUndef)
+        Ops.push_back(DAG.getUNDEF(DstEltVT));
+      else
+        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+    }
+
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                       &Ops[0], Ops.size());
+  }
+
+  // Finally, this must be the case where we are shrinking elements: each input
+  // turns into multiple outputs.
+  bool isS2V = ISD::isScalarToVector(BV);
+  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                            NumOutputsPerInput*BV->getNumOperands());
+  SmallVector<SDValue, 8> Ops;
+
+  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+      for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+        Ops.push_back(DAG.getUNDEF(DstEltVT));
+      continue;
+    }
+
+    APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
+                        getAPIntValue()).zextOrTrunc(SrcBitSize);
+
+    for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+      APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+      if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+        // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+                           Ops[0]);
+      OpVal = OpVal.lshr(DstBitSize);
+    }
+
+    // For big endian targets, swap the order of the pieces of each element.
+    if (TLI.isBigEndian())
+      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                     &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fadd c1, c2) -> (fadd c1, c2)
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+  // fold (fadd A, 0) -> A
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N0;
+  // fold (fadd A, (fneg B)) -> (fsub A, B)
+  if (isNegatibleForFree(N1, LegalOperations) == 2)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+                       GetNegatedExpression(N1, DAG, LegalOperations));
+  // fold (fadd (fneg A), B) -> (fsub B, A)
+  if (isNegatibleForFree(N0, LegalOperations) == 2)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+                       GetNegatedExpression(N0, DAG, LegalOperations));
+
+  // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fsub c1, c2) -> c1-c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+  // fold (fsub A, 0) -> A
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N0;
+  // fold (fsub 0, B) -> -B
+  if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
+    if (isNegatibleForFree(N1, LegalOperations))
+      return GetNegatedExpression(N1, DAG, LegalOperations);
+    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+  }
+  // fold (fsub A, (fneg B)) -> (fadd A, B)
+  if (isNegatibleForFree(N1, LegalOperations))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+                       GetNegatedExpression(N1, DAG, LegalOperations));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fmul c1, c2) -> c1*c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+  // fold (fmul A, 0) -> 0
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N1;
+  // fold (fmul A, 0) -> 0, vector edition.
+  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+    return N1;
+  // fold (fmul X, 2.0) -> (fadd X, X)
+  if (N1CFP && N1CFP->isExactlyValue(+2.0))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+  // fold (fmul X, -1.0) -> (fneg X)
+  if (N1CFP && N1CFP->isExactlyValue(-1.0))
+    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           GetNegatedExpression(N0, DAG, LegalOperations),
+                           GetNegatedExpression(N1, DAG, LegalOperations));
+    }
+  }
+
+  // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fdiv c1, c2) -> c1/c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+
+  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+                           GetNegatedExpression(N0, DAG, LegalOperations),
+                           GetNegatedExpression(N1, DAG, LegalOperations));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (frem c1, c2) -> fmod(c1,c2)
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+  if (N1CFP) {
+    const APFloat& V = N1CFP->getValueAPF();
+    // copysign(x, c1) -> fabs(x)       iff ispos(c1)
+    // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+    if (!V.isNegative()) {
+      if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+        return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+    } else {
+      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+        return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+                           DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+    }
+  }
+
+  // copysign(fabs(x), y) -> copysign(x, y)
+  // copysign(fneg(x), y) -> copysign(x, y)
+  // copysign(copysign(x,z), y) -> copysign(x, y)
+  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+      N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N1);
+
+  // copysign(x, abs(y)) -> abs(x)
+  if (N1.getOpcode() == ISD::FABS)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+  // copysign(x, copysign(y,z)) -> copysign(x, z)
+  if (N1.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0, N1.getOperand(1));
+
+  // copysign(x, fp_extend(y)) -> copysign(x, y)
+  // copysign(x, fp_round(y)) -> copysign(x, y)
+  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0, N1.getOperand(0));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
+
+  // fold (sint_to_fp c1) -> c1fp
+  if (N0C && OpVT != MVT::ppcf128)
+    return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+  // but UINT_TO_FP is legal on this target, try to convert.
+  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+      TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+    // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+    if (DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
+
+  // fold (uint_to_fp c1) -> c1fp
+  if (N0C && OpVT != MVT::ppcf128)
+    return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+  // but SINT_TO_FP is legal on this target, try to convert.
+  if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+      TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+    // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+    if (DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_to_sint c1fp) -> c1
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_to_uint c1fp) -> c1
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_round c1fp) -> c1fp
+  if (N0CFP && N0.getValueType() != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+  // fold (fp_round (fp_extend x)) -> x
+  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+    return N0.getOperand(0);
+
+  // fold (fp_round (fp_round x)) -> (fp_round x)
+  if (N0.getOpcode() == ISD::FP_ROUND) {
+    // This is a value preserving truncation if both round's are.
+    bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+                   N0.getNode()->getConstantOperandVal(1) == 1;
+    return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getIntPtrConstant(IsTrunc));
+  }
+
+  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+    SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+                              N0.getOperand(0), N1);
+    AddToWorkList(Tmp.getNode());
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       Tmp, N0.getOperand(1));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+  // fold (fp_round_inreg c1fp) -> c1fp
+  if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
+    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+  if (N->hasOneUse() &&
+      N->use_begin()->getOpcode() == ISD::FP_ROUND)
+    return SDValue();
+
+  // fold (fp_extend c1fp) -> c1fp
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+  // value of X.
+  if (N0.getOpcode() == ISD::FP_ROUND
+      && N0.getNode()->getConstantOperandVal(1) == 1) {
+    SDValue In = N0.getOperand(0);
+    if (In.getValueType() == VT) return In;
+    if (VT.bitsLT(In.getValueType()))
+      return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+                         In, N0.getOperand(1));
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+  }
+
+  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+  if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getSrcValue(),
+                                     LN0->getSrcValueOffset(),
+                                     N0.getValueType(),
+                                     LN0->isVolatile(), LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(),
+              DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+                          N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+              ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (isNegatibleForFree(N0, LegalOperations))
+    return GetNegatedExpression(N0, DAG, LegalOperations);
+
+  // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+  // constant pool values.
+  if (N0.getOpcode() == ISD::BIT_CONVERT && 
+      !VT.isVector() &&
+      N0.getNode()->hasOneUse() &&
+      N0.getOperand(0).getValueType().isInteger()) {
+    SDValue Int = N0.getOperand(0);
+    EVT IntVT = Int.getValueType();
+    if (IntVT.isInteger() && !IntVT.isVector()) {
+      Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+              DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+      AddToWorkList(Int.getNode());
+      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+                         VT, Int);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fabs c1) -> fabs(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+  // fold (fabs (fabs x)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FABS)
+    return N->getOperand(0);
+  // fold (fabs (fneg x)) -> (fabs x)
+  // fold (fabs (fcopysign x, y)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+  // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+  // constant pool values.
+  if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+      N0.getOperand(0).getValueType().isInteger() &&
+      !N0.getOperand(0).getValueType().isVector()) {
+    SDValue Int = N0.getOperand(0);
+    EVT IntVT = Int.getValueType();
+    if (IntVT.isInteger() && !IntVT.isVector()) {
+      Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+             DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+      AddToWorkList(Int.getNode());
+      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+                         N->getValueType(0), Int);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
+  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+  // on the target.
+  if (N1.getOpcode() == ISD::SETCC &&
+      TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+    return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+                       Chain, N1.getOperand(2),
+                       N1.getOperand(0), N1.getOperand(1), N2);
+  }
+
+  SDNode *Trunc = 0;
+  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+    // Look pass truncate.
+    Trunc = N1.getNode();
+    N1 = N1.getOperand(0);
+  }
+
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
+    // Match this pattern so that we can generate simpler code:
+    //
+    //   %a = ...
+    //   %b = and i32 %a, 2
+    //   %c = srl i32 %b, 1
+    //   brcond i32 %c ...
+    //
+    // into
+    // 
+    //   %a = ...
+    //   %b = and i32 %a, 2
+    //   %c = setcc eq %b, 0
+    //   brcond %c ...
+    //
+    // This applies only when the AND constant value has one bit set and the
+    // SRL constant is equal to the log2 of the AND constant. The back-end is
+    // smart enough to convert the result into a TEST/JMP sequence.
+    SDValue Op0 = N1.getOperand(0);
+    SDValue Op1 = N1.getOperand(1);
+
+    if (Op0.getOpcode() == ISD::AND &&
+        Op1.getOpcode() == ISD::Constant) {
+      SDValue AndOp1 = Op0.getOperand(1);
+
+      if (AndOp1.getOpcode() == ISD::Constant) {
+        const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+        if (AndConst.isPowerOf2() &&
+            cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+          SDValue SetCC =
+            DAG.getSetCC(N->getDebugLoc(),
+                         TLI.getSetCCResultType(Op0.getValueType()),
+                         Op0, DAG.getConstant(0, Op0.getValueType()),
+                         ISD::SETNE);
+
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                                          MVT::Other, Chain, SetCC, N2);
+          // Don't add the new BRCond into the worklist or else SimplifySelectCC
+          // will convert it back to (X & C1) >> C2.
+          CombineTo(N, NewBRCond, false);
+          // Truncate is dead.
+          if (Trunc) {
+            removeFromWorkList(Trunc);
+            DAG.DeleteNode(Trunc);
+          }
+          // Replace the uses of SRL with SETCC
+          DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+          removeFromWorkList(N1.getNode());
+          DAG.DeleteNode(N1.getNode());
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
+  // Use SimplifySetCC to simplify SETCC's.
+  SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+                               CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+                               false);
+  if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+  // fold to a simpler setcc
+  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+                       N->getOperand(0), Simp.getOperand(2),
+                       Simp.getOperand(0), Simp.getOperand(1),
+                       N->getOperand(4));
+
+  return SDValue();
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+  if (!LegalOperations)
+    return false;
+
+  bool isLoad = true;
+  SDValue Ptr;
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->isIndexed())
+      return false;
+    VT = LD->getMemoryVT();
+    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->isIndexed())
+      return false;
+    VT = ST->getMemoryVT();
+    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else {
+    return false;
+  }
+
+  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+  // out.  There is no reason to make this a preinc/predec.
+  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+      Ptr.getNode()->hasOneUse())
+    return false;
+
+  // Ask the target to do addressing mode selection.
+  SDValue BasePtr;
+  SDValue Offset;
+  ISD::MemIndexedMode AM = ISD::UNINDEXED;
+  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+    return false;
+  // Don't create a indexed load / store with zero offset.
+  if (isa<ConstantSDNode>(Offset) &&
+      cast<ConstantSDNode>(Offset)->isNullValue())
+    return false;
+
+  // Try turning it into a pre-indexed load / store except when:
+  // 1) The new base ptr is a frame index.
+  // 2) If N is a store and the new base ptr is either the same as or is a
+  //    predecessor of the value being stored.
+  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+  //    that would create a cycle.
+  // 4) All uses are load / store ops that use it as old base ptr.
+
+  // Check #1.  Preinc'ing a frame index would require copying the stack pointer
+  // (plus the implicit offset) to a register to preinc anyway.
+  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+    return false;
+
+  // Check #2.
+  if (!isLoad) {
+    SDValue Val = cast<StoreSDNode>(N)->getValue();
+    if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+      return false;
+  }
+
+  // Now check for #3 and #4.
+  bool RealUse = false;
+  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+         E = Ptr.getNode()->use_end(); I != E; ++I) {
+    SDNode *Use = *I;
+    if (Use == N)
+      continue;
+    if (Use->isPredecessorOf(N))
+      return false;
+
+    if (!((Use->getOpcode() == ISD::LOAD &&
+           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+          (Use->getOpcode() == ISD::STORE &&
+           cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+      RealUse = true;
+  }
+
+  if (!RealUse)
+    return false;
+
+  SDValue Result;
+  if (isLoad)
+    Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+                                BasePtr, Offset, AM);
+  else
+    Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+                                 BasePtr, Offset, AM);
+  ++PreIndexedNodes;
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.4 ";
+        N->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Result.getNode()->dump(&DAG);
+        dbgs() << '\n');
+  WorkListRemover DeadNodes(*this);
+  if (isLoad) {
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+                                  &DeadNodes);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+                                  &DeadNodes);
+  } else {
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+                                  &DeadNodes);
+  }
+
+  // Finally, since the node is now dead, remove it from the graph.
+  DAG.DeleteNode(N);
+
+  // Replace the uses of Ptr with uses of the updated base value.
+  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+                                &DeadNodes);
+  removeFromWorkList(Ptr.getNode());
+  DAG.DeleteNode(Ptr.getNode());
+
+  return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+  if (!LegalOperations)
+    return false;
+
+  bool isLoad = true;
+  SDValue Ptr;
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->isIndexed())
+      return false;
+    VT = LD->getMemoryVT();
+    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->isIndexed())
+      return false;
+    VT = ST->getMemoryVT();
+    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else {
+    return false;
+  }
+
+  if (Ptr.getNode()->hasOneUse())
+    return false;
+
+  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+         E = Ptr.getNode()->use_end(); I != E; ++I) {
+    SDNode *Op = *I;
+    if (Op == N ||
+        (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+      continue;
+
+    SDValue BasePtr;
+    SDValue Offset;
+    ISD::MemIndexedMode AM = ISD::UNINDEXED;
+    if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+      if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
+        std::swap(BasePtr, Offset);
+      if (Ptr != BasePtr)
+        continue;
+      // Don't create a indexed load / store with zero offset.
+      if (isa<ConstantSDNode>(Offset) &&
+          cast<ConstantSDNode>(Offset)->isNullValue())
+        continue;
+
+      // Try turning it into a post-indexed load / store except when
+      // 1) All uses are load / store ops that use it as base ptr.
+      // 2) Op must be independent of N, i.e. Op is neither a predecessor
+      //    nor a successor of N. Otherwise, if Op is folded that would
+      //    create a cycle.
+
+      if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+        continue;
+
+      // Check for #1.
+      bool TryNext = false;
+      for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+             EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+        SDNode *Use = *II;
+        if (Use == Ptr.getNode())
+          continue;
+
+        // If all the uses are load / store addresses, then don't do the
+        // transformation.
+        if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+          bool RealUse = false;
+          for (SDNode::use_iterator III = Use->use_begin(),
+                 EEE = Use->use_end(); III != EEE; ++III) {
+            SDNode *UseUse = *III;
+            if (!((UseUse->getOpcode() == ISD::LOAD &&
+                   cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
+                  (UseUse->getOpcode() == ISD::STORE &&
+                   cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+              RealUse = true;
+          }
+
+          if (!RealUse) {
+            TryNext = true;
+            break;
+          }
+        }
+      }
+
+      if (TryNext)
+        continue;
+
+      // Check for #2
+      if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+        SDValue Result = isLoad
+          ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+                               BasePtr, Offset, AM)
+          : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+                                BasePtr, Offset, AM);
+        ++PostIndexedNodes;
+        ++NodesCombined;
+        DEBUG(dbgs() << "\nReplacing.5 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Result.getNode()->dump(&DAG);
+              dbgs() << '\n');
+        WorkListRemover DeadNodes(*this);
+        if (isLoad) {
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+                                        &DeadNodes);
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+                                        &DeadNodes);
+        } else {
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+                                        &DeadNodes);
+        }
+
+        // Finally, since the node is now dead, remove it from the graph.
+        DAG.DeleteNode(N);
+
+        // Replace the uses of Use with uses of the updated base value.
+        DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+                                      Result.getValue(isLoad ? 1 : 0),
+                                      &DeadNodes);
+        removeFromWorkList(Op);
+        DAG.DeleteNode(Op);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+  LoadSDNode *LD  = cast<LoadSDNode>(N);
+  SDValue Chain = LD->getChain();
+  SDValue Ptr   = LD->getBasePtr();
+
+  // Try to infer better alignment information than the load already has.
+  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > LD->getAlignment())
+        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+                              LD->getValueType(0),
+                              Chain, Ptr, LD->getSrcValue(),
+                              LD->getSrcValueOffset(), LD->getMemoryVT(),
+                              LD->isVolatile(), Align);
+    }
+  }
+
+  // If load is not volatile and there are no uses of the loaded value (and
+  // the updated indexed value in case of indexed loads), change uses of the
+  // chain value into uses of the chain input (i.e. delete the dead load).
+  if (!LD->isVolatile()) {
+    if (N->getValueType(1) == MVT::Other) {
+      // Unindexed loads.
+      if (N->hasNUsesOfValue(0, 0)) {
+        // It's not safe to use the two value CombineTo variant here. e.g.
+        // v1, chain2 = load chain1, loc
+        // v2, chain3 = load chain2, loc
+        // v3         = add v2, c
+        // Now we replace use of chain2 with chain1.  This makes the second load
+        // isomorphic to the one we are deleting, and thus makes this load live.
+        DEBUG(dbgs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith chain: ";
+              Chain.getNode()->dump(&DAG);
+              dbgs() << "\n");
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+
+        if (N->use_empty()) {
+          removeFromWorkList(N);
+          DAG.DeleteNode(N);
+        }
+
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    } else {
+      // Indexed loads.
+      assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+        SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+        DEBUG(dbgs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Undef.getNode()->dump(&DAG);
+              dbgs() << " and 2 other values\n");
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+                                      DAG.getUNDEF(N->getValueType(1)),
+                                      &DeadNodes);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+        removeFromWorkList(N);
+        DAG.DeleteNode(N);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+
+  // If this load is directly stored, replace the load value with the stored
+  // value.
+  // TODO: Handle store large -> read small portion.
+  // TODO: Handle TRUNCSTORE/LOADEXT
+  if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
+      !LD->isVolatile()) {
+    if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+      StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+      if (PrevST->getBasePtr() == Ptr &&
+          PrevST->getValue().getValueType() == N->getValueType(0))
+      return CombineTo(N, Chain.getOperand(1), Chain);
+    }
+  }
+
+  if (CombinerAA) {
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDValue BetterChain = FindBetterChain(N, Chain);
+
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      SDValue ReplLoad;
+
+      // Replace the chain to void dependency.
+      if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+        ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+                               BetterChain, Ptr,
+                               LD->getSrcValue(), LD->getSrcValueOffset(),
+                               LD->isVolatile(), LD->getAlignment());
+      } else {
+        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+                                  LD->getValueType(0),
+                                  BetterChain, Ptr, LD->getSrcValue(),
+                                  LD->getSrcValueOffset(),
+                                  LD->getMemoryVT(),
+                                  LD->isVolatile(),
+                                  LD->getAlignment());
+      }
+
+      // Create token factor to keep old chain connected.
+      SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                                  MVT::Other, Chain, ReplLoad.getValue(1));
+      
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+      
+      // Replace uses with load result and token factor. Don't add users
+      // to work list.
+      return CombineTo(N, ReplLoad.getValue(0), Token, false);
+    }
+  }
+
+  // Try transforming N to an indexed load.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  if (ST->isVolatile())
+    return SDValue();
+
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  SDValue Ptr   = ST->getBasePtr();
+  EVT VT = Value.getValueType();
+
+  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+    return SDValue();
+
+  unsigned Opc = Value.getOpcode();
+  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+      Value.getOperand(1).getOpcode() != ISD::Constant)
+    return SDValue();
+
+  SDValue N0 = Value.getOperand(0);
+  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+    LoadSDNode *LD = cast<LoadSDNode>(N0);
+    if (LD->getBasePtr() != Ptr)
+      return SDValue();
+
+    // Find the type to narrow it the load / op / store to.
+    SDValue N1 = Value.getOperand(1);
+    unsigned BitWidth = N1.getValueSizeInBits();
+    APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+    if (Opc == ISD::AND)
+      Imm ^= APInt::getAllOnesValue(BitWidth);
+    if (Imm == 0 || Imm.isAllOnesValue())
+      return SDValue();
+    unsigned ShAmt = Imm.countTrailingZeros();
+    unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+    unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+    while (NewBW < BitWidth &&
+           !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+             TLI.isNarrowingProfitable(VT, NewVT))) {
+      NewBW = NextPowerOf2(NewBW);
+      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+    }
+    if (NewBW >= BitWidth)
+      return SDValue();
+
+    // If the lsb changed does not start at the type bitwidth boundary,
+    // start at the previous one.
+    if (ShAmt % NewBW)
+      ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+    if ((Imm & Mask) == Imm) {
+      APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+      if (Opc == ISD::AND)
+        NewImm ^= APInt::getAllOnesValue(NewBW);
+      uint64_t PtrOff = ShAmt / 8;
+      // For big endian targets, we need to adjust the offset to the pointer to
+      // load the correct bytes.
+      if (TLI.isBigEndian())
+        PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+      unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+      if (NewAlign <
+          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
+        return SDValue();
+
+      SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+                                   Ptr.getValueType(), Ptr,
+                                   DAG.getConstant(PtrOff, Ptr.getValueType()));
+      SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+                                  LD->getChain(), NewPtr,
+                                  LD->getSrcValue(), LD->getSrcValueOffset(),
+                                  LD->isVolatile(), NewAlign);
+      SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+                                   DAG.getConstant(NewImm, NewVT));
+      SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+                                   NewVal, NewPtr,
+                                   ST->getSrcValue(), ST->getSrcValueOffset(),
+                                   false, NewAlign);
+
+      AddToWorkList(NewPtr.getNode());
+      AddToWorkList(NewLD.getNode());
+      AddToWorkList(NewVal.getNode());
+      WorkListRemover DeadNodes(*this);
+      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+                                    &DeadNodes);
+      ++OpsNarrowed;
+      return NewST;
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  SDValue Ptr   = ST->getBasePtr();
+
+  // Try to infer better alignment information than the store already has.
+  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > ST->getAlignment())
+        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+                                 Ptr, ST->getSrcValue(),
+                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
+                                 ST->isVolatile(), Align);
+    }
+  }
+
+  // If this is a store of a bit convert, store the input value if the
+  // resultant store does not need a higher alignment than the original.
+  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+      ST->isUnindexed()) {
+    unsigned OrigAlign = ST->getAlignment();
+    EVT SVT = Value.getOperand(0).getValueType();
+    unsigned Align = TLI.getTargetData()->
+      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+    if (Align <= OrigAlign &&
+        ((!LegalOperations && !ST->isVolatile()) ||
+         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+      return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+                          Ptr, ST->getSrcValue(),
+                          ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+  }
+
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+    // NOTE: If the original store is volatile, this transform must not increase
+    // the number of stores.  For example, on x86-32 an f64 can be stored in one
+    // processor operation but an i64 (which is not legal) requires two.  So the
+    // transform should not be done in this case.
+    if (Value.getOpcode() != ISD::TargetConstantFP) {
+      SDValue Tmp;
+      switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unknown FP type");
+      case MVT::f80:    // We don't do this for these yet.
+      case MVT::f128:
+      case MVT::ppcf128:
+        break;
+      case MVT::f32:
+        if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
+             !ST->isVolatile()) ||
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+                              bitcastToAPInt().getZExtValue(), MVT::i32);
+          return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+                              Ptr, ST->getSrcValue(),
+                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              ST->getAlignment());
+        }
+        break;
+      case MVT::f64:
+        if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
+             !ST->isVolatile()) ||
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+          Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                                getZExtValue(), MVT::i64);
+          return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+                              Ptr, ST->getSrcValue(),
+                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              ST->getAlignment());
+        } else if (!ST->isVolatile() &&
+                   TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          // Many FP stores are not made apparent until after legalize, e.g. for
+          // argument passing.  Since this is so common, custom legalize the
+          // 64-bit integer store into two 32-bit stores.
+          uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+          SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+          if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+          int SVOffset = ST->getSrcValueOffset();
+          unsigned Alignment = ST->getAlignment();
+          bool isVolatile = ST->isVolatile();
+
+          SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+                                     Ptr, ST->getSrcValue(),
+                                     ST->getSrcValueOffset(),
+                                     isVolatile, ST->getAlignment());
+          Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+                            DAG.getConstant(4, Ptr.getValueType()));
+          SVOffset += 4;
+          Alignment = MinAlign(Alignment, 4U);
+          SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+                                     Ptr, ST->getSrcValue(),
+                                     SVOffset, isVolatile, Alignment);
+          return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+                             St0, St1);
+        }
+
+        break;
+      }
+    }
+  }
+
+  if (CombinerAA) {
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDValue BetterChain = FindBetterChain(N, Chain);
+
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      SDValue ReplStore;
+
+      // Replace the chain to avoid dependency.
+      if (ST->isTruncatingStore()) {
+        ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+                                      ST->getSrcValue(),ST->getSrcValueOffset(),
+                                      ST->getMemoryVT(),
+                                      ST->isVolatile(), ST->getAlignment());
+      } else {
+        ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+                                 ST->getSrcValue(), ST->getSrcValueOffset(),
+                                 ST->isVolatile(), ST->getAlignment());
+      }
+
+      // Create token to keep both nodes around.
+      SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                                  MVT::Other, Chain, ReplStore);
+
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+
+      // Don't add users to work list.
+      return CombineTo(N, Token, false);
+    }
+  }
+
+  // Try transforming N to an indexed store.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDValue(N, 0);
+
+  // FIXME: is there such a thing as a truncating indexed store?
+  if (ST->isTruncatingStore() && ST->isUnindexed() &&
+      Value.getValueType().isInteger()) {
+    // See if we can simplify the input to this truncstore with knowledge that
+    // only the low bits are being used.  For example:
+    // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
+    SDValue Shorter =
+      GetDemandedBits(Value,
+                      APInt::getLowBitsSet(Value.getValueSizeInBits(),
+                                           ST->getMemoryVT().getSizeInBits()));
+    AddToWorkList(Value.getNode());
+    if (Shorter.getNode())
+      return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+                               Ptr, ST->getSrcValue(),
+                               ST->getSrcValueOffset(), ST->getMemoryVT(),
+                               ST->isVolatile(), ST->getAlignment());
+
+    // Otherwise, see if we can simplify the operation with
+    // SimplifyDemandedBits, which only works if the value has a single use.
+    if (SimplifyDemandedBits(Value,
+                             APInt::getLowBitsSet(
+                               Value.getValueType().getScalarType().getSizeInBits(),
+                               ST->getMemoryVT().getSizeInBits())))
+      return SDValue(N, 0);
+  }
+
+  // If this is a load followed by a store to the same location, then the store
+  // is dead/noop.
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+    if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+        ST->isUnindexed() && !ST->isVolatile() &&
+        // There can't be any side effects between the load and store, such as
+        // a call or store.
+        Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+      // The store is dead, remove it.
+      return Chain;
+    }
+  }
+
+  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+  // truncating store.  We can do this even if this is already a truncstore.
+  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+      && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+      TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+                            ST->getMemoryVT())) {
+    return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+                             Ptr, ST->getSrcValue(),
+                             ST->getSrcValueOffset(), ST->getMemoryVT(),
+                             ST->isVolatile(), ST->getAlignment());
+  }
+
+  return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+  SDValue InVec = N->getOperand(0);
+  SDValue InVal = N->getOperand(1);
+  SDValue EltNo = N->getOperand(2);
+
+  // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+  // vector with the inserted element.
+  if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
+                                InVec.getNode()->op_end());
+    if (Elt < Ops.size())
+      Ops[Elt] = InVal;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                       InVec.getValueType(), &Ops[0], Ops.size());
+  }
+  // If the invec is an UNDEF and if EltNo is a constant, create a new 
+  // BUILD_VECTOR with undef elements and the inserted element.
+  if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && 
+      isa<ConstantSDNode>(EltNo)) {
+    EVT VT = InVec.getValueType();
+    EVT EltVT = VT.getVectorElementType();
+    unsigned NElts = VT.getVectorNumElements();
+    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
+
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    if (Elt < Ops.size())
+      Ops[Elt] = InVal;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                       InVec.getValueType(), &Ops[0], Ops.size());
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+  // (vextract (scalar_to_vector val, 0) -> val
+  SDValue InVec = N->getOperand(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+   // Check if the result type doesn't match the inserted element type. A
+   // SCALAR_TO_VECTOR may truncate the inserted element and the
+   // EXTRACT_VECTOR_ELT may widen the extracted vector.
+   EVT EltVT = InVec.getValueType().getVectorElementType();
+   SDValue InOp = InVec.getOperand(0);
+   EVT NVT = N->getValueType(0);
+   if (InOp.getValueType() != NVT) {
+     assert(InOp.getValueType().isInteger() && NVT.isInteger());
+     return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+   }
+   return InOp;
+ }
+
+  // Perform only after legalization to ensure build_vector / vector_shuffle
+  // optimizations have already been done.
+  if (!LegalOperations) return SDValue();
+
+  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+  SDValue EltNo = N->getOperand(1);
+
+  if (isa<ConstantSDNode>(EltNo)) {
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    bool NewLoad = false;
+    bool BCNumEltsChanged = false;
+    EVT VT = InVec.getValueType();
+    EVT ExtVT = VT.getVectorElementType();
+    EVT LVT = ExtVT;
+
+    if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+      EVT BCVT = InVec.getOperand(0).getValueType();
+      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+        return SDValue();
+      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+        BCNumEltsChanged = true;
+      InVec = InVec.getOperand(0);
+      ExtVT = BCVT.getVectorElementType();
+      NewLoad = true;
+    }
+
+    LoadSDNode *LN0 = NULL;
+    const ShuffleVectorSDNode *SVN = NULL;
+    if (ISD::isNormalLoad(InVec.getNode())) {
+      LN0 = cast<LoadSDNode>(InVec);
+    } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+               InVec.getOperand(0).getValueType() == ExtVT &&
+               ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+      LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+    } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+      // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+      // =>
+      // (load $addr+1*size)
+
+      // If the bit convert changed the number of elements, it is unsafe
+      // to examine the mask.
+      if (BCNumEltsChanged)
+        return SDValue();
+
+      // Select the input vector, guarding against out of range extract vector.
+      unsigned NumElems = VT.getVectorNumElements();
+      int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+      InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+      if (InVec.getOpcode() == ISD::BIT_CONVERT)
+        InVec = InVec.getOperand(0);
+      if (ISD::isNormalLoad(InVec.getNode())) {
+        LN0 = cast<LoadSDNode>(InVec);
+        Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
+      }
+    }
+
+    if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
+      return SDValue();
+
+    unsigned Align = LN0->getAlignment();
+    if (NewLoad) {
+      // Check the resultant load doesn't need a higher alignment than the
+      // original load.
+      unsigned NewAlign =
+        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+        return SDValue();
+
+      Align = NewAlign;
+    }
+
+    SDValue NewPtr = LN0->getBasePtr();
+    if (Elt) {
+      unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+      EVT PtrType = NewPtr.getValueType();
+      if (TLI.isBigEndian())
+        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+      NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+                           DAG.getConstant(PtrOff, PtrType));
+    }
+
+    return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+                       LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                       LN0->isVolatile(), Align);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  unsigned NumInScalars = N->getNumOperands();
+  EVT VT = N->getValueType(0);
+
+  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+  // at most two distinct vectors, turn this into a shuffle node.
+  SDValue VecIn1, VecIn2;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    // Ignore undef inputs.
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+    // If this input is something other than a EXTRACT_VECTOR_ELT with a
+    // constant index, bail out.
+    if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+
+    // If the input vector type disagrees with the result of the build_vector,
+    // we can't make a shuffle.
+    SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+    if (ExtractedFromVec.getValueType() != VT) {
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+
+    // Otherwise, remember this.  We allow up to two distinct input vectors.
+    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+      continue;
+
+    if (VecIn1.getNode() == 0) {
+      VecIn1 = ExtractedFromVec;
+    } else if (VecIn2.getNode() == 0) {
+      VecIn2 = ExtractedFromVec;
+    } else {
+      // Too many inputs.
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+  }
+
+  // If everything is good, we can make a shuffle operation.
+  if (VecIn1.getNode()) {
+    SmallVector<int, 8> Mask;
+    for (unsigned i = 0; i != NumInScalars; ++i) {
+      if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+        Mask.push_back(-1);
+        continue;
+      }
+
+      // If extracting from the first vector, just use the index directly.
+      SDValue Extract = N->getOperand(i);
+      SDValue ExtVal = Extract.getOperand(1);
+      if (Extract.getOperand(0) == VecIn1) {
+        unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+        if (ExtIndex > VT.getVectorNumElements())
+          return SDValue();
+        
+        Mask.push_back(ExtIndex);
+        continue;
+      }
+
+      // Otherwise, use InIdx + VecSize
+      unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+      Mask.push_back(Idx+NumInScalars);
+    }
+
+    // Add count and size info.
+    if (!TLI.isTypeLegal(VT) && LegalTypes)
+      return SDValue();
+
+    // Return the new VECTOR_SHUFFLE node.
+    SDValue Ops[2];
+    Ops[0] = VecIn1;
+    Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+  // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
+  // inputs come from at most two distinct vectors, turn this into a shuffle
+  // node.
+
+  // If we only have one input vector, we don't need to do any concatenation.
+  if (N->getNumOperands() == 1)
+    return N->getOperand(0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+  return SDValue();
+  
+  EVT VT = N->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SDValue N0 = N->getOperand(0);
+
+  assert(N0.getValueType().getVectorNumElements() == NumElts &&
+        "Vector shuffle must be normalized in DAG");
+
+  // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+
+  // If it is a splat, check if the argument vector is a build_vector with
+  // all scalar elements the same.
+  if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+    SDNode *V = N0.getNode();
+    
+
+    // If this is a bit convert that changes the element type of the vector but
+    // not the number of vector elements, look through it.  Be careful not to
+    // look though conversions that change things like v4f32 to v2f64.
+    if (V->getOpcode() == ISD::BIT_CONVERT) {
+      SDValue ConvInput = V->getOperand(0);
+      if (ConvInput.getValueType().isVector() &&
+          ConvInput.getValueType().getVectorNumElements() == NumElts)
+        V = ConvInput.getNode();
+    }
+
+    if (V->getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElems = V->getNumOperands();
+      unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
+      if (NumElems > BaseIdx) {
+        SDValue Base;
+        bool AllSame = true;
+        for (unsigned i = 0; i != NumElems; ++i) {
+          if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+            Base = V->getOperand(i);
+            break;
+          }
+        }
+        // Splat of <u, u, u, u>, return <u, u, u, u>
+        if (!Base.getNode())
+          return N0;
+        for (unsigned i = 0; i != NumElems; ++i) {
+          if (V->getOperand(i) != Base) {
+            AllSame = false;
+            break;
+          }
+        }
+        // Splat of <x, x, x, x>, return <x, x, x, x>
+        if (AllSame)
+          return N0;
+      }
+    }
+  }
+  return SDValue();
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+///      vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  if (N->getOpcode() == ISD::AND) {
+    if (RHS.getOpcode() == ISD::BIT_CONVERT)
+      RHS = RHS.getOperand(0);
+    if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<int, 8> Indices;
+      unsigned NumElts = RHS.getNumOperands();
+      for (unsigned i = 0; i != NumElts; ++i) {
+        SDValue Elt = RHS.getOperand(i);
+        if (!isa<ConstantSDNode>(Elt))
+          return SDValue();
+        else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+          Indices.push_back(i);
+        else if (cast<ConstantSDNode>(Elt)->isNullValue())
+          Indices.push_back(NumElts);
+        else
+          return SDValue();
+      }
+
+      // Let's see if the target supports this vector_shuffle.
+      EVT RVT = RHS.getValueType();
+      if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+        return SDValue();
+
+      // Return the new VECTOR_SHUFFLE node.
+      EVT EltVT = RVT.getVectorElementType();
+      SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+                                     DAG.getConstant(0, EltVT));
+      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                                 RVT, &ZeroOps[0], ZeroOps.size());
+      LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+      SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+      return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+    }
+  }
+
+  return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+  // After legalize, the target may be depending on adds and other
+  // binary ops to provide legal ways to construct constants or other
+  // things. Simplifying them may result in a loss of legality.
+  if (LegalOperations) return SDValue();
+
+  EVT VT = N->getValueType(0);
+  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+
+  EVT EltType = VT.getVectorElementType();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue Shuffle = XformToShuffleWithZero(N);
+  if (Shuffle.getNode()) return Shuffle;
+
+  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+  // this operation.
+  if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+      RHS.getOpcode() == ISD::BUILD_VECTOR) {
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+      SDValue LHSOp = LHS.getOperand(i);
+      SDValue RHSOp = RHS.getOperand(i);
+      // If these two elements can't be folded, bail out.
+      if ((LHSOp.getOpcode() != ISD::UNDEF &&
+           LHSOp.getOpcode() != ISD::Constant &&
+           LHSOp.getOpcode() != ISD::ConstantFP) ||
+          (RHSOp.getOpcode() != ISD::UNDEF &&
+           RHSOp.getOpcode() != ISD::Constant &&
+           RHSOp.getOpcode() != ISD::ConstantFP))
+        break;
+
+      // Can't fold divide by zero.
+      if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+          N->getOpcode() == ISD::FDIV) {
+        if ((RHSOp.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+            (RHSOp.getOpcode() == ISD::ConstantFP &&
+             cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+          break;
+      }
+
+      Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
+                                EltType, LHSOp, RHSOp));
+      AddToWorkList(Ops.back().getNode());
+      assert((Ops.back().getOpcode() == ISD::UNDEF ||
+              Ops.back().getOpcode() == ISD::Constant ||
+              Ops.back().getOpcode() == ISD::ConstantFP) &&
+             "Scalar binop didn't fold!");
+    }
+
+    if (Ops.size() == LHS.getNumOperands()) {
+      EVT VT = LHS.getValueType();
+      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                         &Ops[0], Ops.size());
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+                                    SDValue N1, SDValue N2){
+  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+  // If we got a simplified select_cc node back from SimplifySelectCC, then
+  // break it down into a new SETCC node, and a new SELECT node, and then return
+  // the SELECT node, since we were called with a SELECT node.
+  if (SCC.getNode()) {
+    // Check to see if we got a select_cc back (to turn into setcc/select).
+    // Otherwise, just return whatever node we got back, like fabs.
+    if (SCC.getOpcode() == ISD::SELECT_CC) {
+      SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+                                  N0.getValueType(),
+                                  SCC.getOperand(0), SCC.getOperand(1),
+                                  SCC.getOperand(4));
+      AddToWorkList(SETCC.getNode());
+      return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+                         SCC.getOperand(2), SCC.getOperand(3), SETCC);
+    }
+
+    return SCC;
+  }
+  return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select.  Callers of this should assume that TheSelect is deleted if this
+/// returns true.  As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+                                    SDValue RHS) {
+
+  // If this is a select from two identical things, try to pull the operation
+  // through the select.
+  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
+    // If this is a load and the token chain is identical, replace the select
+    // of two loads with a load through a select of the address to load from.
+    // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+    // constants have been dropped into the constant pool.
+    if (LHS.getOpcode() == ISD::LOAD &&
+        // Do not let this transformation reduce the number of volatile loads.
+        !cast<LoadSDNode>(LHS)->isVolatile() &&
+        !cast<LoadSDNode>(RHS)->isVolatile() &&
+        // Token chains must be identical.
+        LHS.getOperand(0) == RHS.getOperand(0)) {
+      LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+      LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+      // If this is an EXTLOAD, the VT's must match.
+      if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+        // FIXME: this discards src value information.  This is
+        // over-conservative. It would be beneficial to be able to remember
+        // both potential memory locations.  Since we are discarding
+        // src value info, don't do the transformation if the memory
+        // locations are not in the default address space.
+        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
+        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
+          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
+            LLDAddrSpace = PT->getAddressSpace();
+        }
+        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
+          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
+            RLDAddrSpace = PT->getAddressSpace();
+        }
+        SDValue Addr;
+        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
+          if (TheSelect->getOpcode() == ISD::SELECT) {
+            // Check that the condition doesn't reach either load.  If so, folding
+            // this will induce a cycle into the DAG.
+            if ((!LLD->hasAnyUseOfValue(1) ||
+                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
+                (!RLD->hasAnyUseOfValue(1) ||
+                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
+              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+                                 LLD->getBasePtr().getValueType(),
+                                 TheSelect->getOperand(0), LLD->getBasePtr(),
+                                 RLD->getBasePtr());
+            }
+          } else {
+            // Check that the condition doesn't reach either load.  If so, folding
+            // this will induce a cycle into the DAG.
+            if ((!LLD->hasAnyUseOfValue(1) ||
+                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
+                (!RLD->hasAnyUseOfValue(1) ||
+                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
+              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+                                 LLD->getBasePtr().getValueType(),
+                                 TheSelect->getOperand(0),
+                                 TheSelect->getOperand(1),
+                                 LLD->getBasePtr(), RLD->getBasePtr(),
+                                 TheSelect->getOperand(4));
+            }
+          }
+        }
+
+        if (Addr.getNode()) {
+          SDValue Load;
+          if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+            Load = DAG.getLoad(TheSelect->getValueType(0),
+                               TheSelect->getDebugLoc(),
+                               LLD->getChain(),
+                               Addr, 0, 0,
+                               LLD->isVolatile(),
+                               LLD->getAlignment());
+          } else {
+            Load = DAG.getExtLoad(LLD->getExtensionType(),
+                                  TheSelect->getDebugLoc(),
+                                  TheSelect->getValueType(0),
+                                  LLD->getChain(), Addr, 0, 0,
+                                  LLD->getMemoryVT(),
+                                  LLD->isVolatile(),
+                                  LLD->getAlignment());
+          }
+
+          // Users of the select now use the result of the load.
+          CombineTo(TheSelect, Load);
+
+          // Users of the old loads now use the new load's chain.  We know the
+          // old-load value is dead now.
+          CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+          CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+          return true;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+                                      SDValue N2, SDValue N3,
+                                      ISD::CondCode CC, bool NotExtCompare) {
+  // (x ? y : y) -> y.
+  if (N2 == N3) return N2;
+  
+  EVT VT = N2.getValueType();
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+  // Determine if the condition we're dealing with is constant
+  SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+                              N0, N1, CC, DL, false);
+  if (SCC.getNode()) AddToWorkList(SCC.getNode());
+  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+  // fold select_cc true, x, y -> x
+  if (SCCC && !SCCC->isNullValue())
+    return N2;
+  // fold select_cc false, x, y -> y
+  if (SCCC && SCCC->isNullValue())
+    return N3;
+
+  // Check to see if we can simplify the select into an fabs node
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+    // Allow either -0.0 or 0.0
+    if (CFP->getValueAPF().isZero()) {
+      // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+      if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+          N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+          N2 == N3.getOperand(0))
+        return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+      // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+      if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+          N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+          N2.getOperand(0) == N3)
+        return DAG.getNode(ISD::FABS, DL, VT, N3);
+    }
+  }
+  
+  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+  // in it.  This is a win when the constant is not otherwise available because
+  // it replaces two constant pool loads with one.  We only do this if the FP
+  // type is known to be legal, because if it isn't, then we are before legalize
+  // types an we want the other legalization to happen first (e.g. to avoid
+  // messing with soft float) and if the ConstantFP is not legal, because if
+  // it is legal, we may not need to store the FP constant in a constant pool.
+  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+    if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+      if (TLI.isTypeLegal(N2.getValueType()) &&
+          (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+           TargetLowering::Legal) &&
+          // If both constants have multiple uses, then we won't need to do an
+          // extra load, they are likely around in registers for other users.
+          (TV->hasOneUse() || FV->hasOneUse())) {
+        Constant *Elts[] = {
+          const_cast<ConstantFP*>(FV->getConstantFPValue()),
+          const_cast<ConstantFP*>(TV->getConstantFPValue())
+        };
+        const Type *FPTy = Elts[0]->getType();
+        const TargetData &TD = *TLI.getTargetData();
+        
+        // Create a ConstantArray of the two constants.
+        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+                                            TD.getPrefTypeAlignment(FPTy));
+        unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+        // Get the offsets to the 0 and 1 element of the array so that we can
+        // select between them.
+        SDValue Zero = DAG.getIntPtrConstant(0);
+        unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+        SDValue One = DAG.getIntPtrConstant(EltSize);
+        
+        SDValue Cond = DAG.getSetCC(DL,
+                                    TLI.getSetCCResultType(N0.getValueType()),
+                                    N0, N1, CC);
+        SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+                                        Cond, One, Zero);
+        CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+                            CstOffset);
+        return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+                           PseudoSourceValue::getConstantPool(), 0, false,
+                           Alignment);
+
+      }
+    }  
+
+  // Check to see if we can perform the "gzip trick", transforming
+  // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+      N0.getValueType().isInteger() &&
+      N2.getValueType().isInteger() &&
+      (N1C->isNullValue() ||                         // (a < 0) ? b : 0
+       (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
+    EVT XType = N0.getValueType();
+    EVT AType = N2.getValueType();
+    if (XType.bitsGE(AType)) {
+      // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+      // single-bit constant.
+      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+        unsigned ShCtV = N2C->getAPIntValue().logBase2();
+        ShCtV = XType.getSizeInBits()-ShCtV-1;
+        SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());
+        SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+                                    XType, N0, ShCt);
+        AddToWorkList(Shift.getNode());
+
+        if (XType.bitsGT(AType)) {
+          Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+          AddToWorkList(Shift.getNode());
+        }
+
+        return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+      }
+
+      SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+                                  XType, N0,
+                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                                  getShiftAmountTy()));
+      AddToWorkList(Shift.getNode());
+
+      if (XType.bitsGT(AType)) {
+        Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+        AddToWorkList(Shift.getNode());
+      }
+
+      return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+    }
+  }
+
+  // fold select C, 16, 0 -> shl C, 4
+  if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+      TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+
+    // If the caller doesn't want us to simplify this into a zext of a compare,
+    // don't do it.
+    if (NotExtCompare && N2C->getAPIntValue() == 1)
+      return SDValue();
+
+    // Get a SetCC of the condition
+    // FIXME: Should probably make sure that setcc is legal if we ever have a
+    // target where it isn't.
+    SDValue Temp, SCC;
+    // cast from setcc result type to select result type
+    if (LegalTypes) {
+      SCC  = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+                          N0, N1, CC);
+      if (N2.getValueType().bitsLT(SCC.getValueType()))
+        Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
+      else
+        Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+                           N2.getValueType(), SCC);
+    } else {
+      SCC  = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+      Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+                         N2.getValueType(), SCC);
+    }
+
+    AddToWorkList(SCC.getNode());
+    AddToWorkList(Temp.getNode());
+
+    if (N2C->getAPIntValue() == 1)
+      return Temp;
+
+    // shl setcc result by log2 n2c
+    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy()));
+  }
+
+  // Check to see if this is the equivalent of setcc
+  // FIXME: Turn all of these into setcc if setcc if setcc is legal
+  // otherwise, go ahead with the folds.
+  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+    EVT XType = N0.getValueType();
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+      SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+      if (Res.getValueType() != VT)
+        Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+      return Res;
+    }
+
+    // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(ISD::CTLZ, XType))) {
+      SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+      return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+                         DAG.getConstant(Log2_32(XType.getSizeInBits()),
+                                         getShiftAmountTy()));
+    }
+    // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+      SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+                                  XType, DAG.getConstant(0, XType), N0);
+      SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+      return DAG.getNode(ISD::SRL, DL, XType,
+                         DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+                         DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy()));
+    }
+    // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+      SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+                                 DAG.getConstant(XType.getSizeInBits()-1,
+                                                 getShiftAmountTy()));
+      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+    }
+  }
+
+  // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
+      N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
+      N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
+    EVT XType = N0.getValueType();
+    SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
+                                DAG.getConstant(XType.getSizeInBits()-1,
+                                                getShiftAmountTy()));
+    SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,
+                              N0, Shift);
+    AddToWorkList(Shift.getNode());
+    AddToWorkList(Add.getNode());
+    return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+  }
+  // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
+      N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
+    if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
+      EVT XType = N0.getValueType();
+      if (SubC->isNullValue() && XType.isInteger()) {
+        SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+                                    N0,
+                                    DAG.getConstant(XType.getSizeInBits()-1,
+                                                    getShiftAmountTy()));
+        SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+                                  XType, N0, Shift);
+        AddToWorkList(Shift.getNode());
+        AddToWorkList(Add.getNode());
+        return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
+                                   SDValue N1, ISD::CondCode Cond,
+                                   DebugLoc DL, bool foldBooleans) {
+  TargetLowering::DAGCombinerInfo
+    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself.  Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+                           GlobalValue *&GV, void *&CV) {
+  // Assume it is a primitive operation.
+  Base = Ptr; Offset = 0; GV = 0; CV = 0;
+
+  // If it's an adding a simple constant then integrate the offset.
+  if (Base.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+      Base = Base.getOperand(0);
+      Offset += C->getZExtValue();
+    }
+  }
+  
+  // Return the underlying GlobalValue, and update the Offset.  Return false
+  // for GlobalAddressSDNode since the same GlobalAddress may be represented
+  // by multiple nodes with different offsets.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+    GV = G->getGlobal();
+    Offset += G->getOffset();
+    return false;
+  }
+
+  // Return the underlying Constant value, and update the Offset.  Return false
+  // for ConstantSDNodes since the same constant pool entry may be represented
+  // by multiple nodes with different offsets.
+  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+    CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+                                         : (void *)C->getConstVal();
+    Offset += C->getOffset();
+    return false;
+  }
+  // If it's any of the following then it can't alias with anything but itself.
+  return isa<FrameIndexSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+                          const Value *SrcValue1, int SrcValueOffset1,
+                          unsigned SrcValueAlign1,
+                          SDValue Ptr2, int64_t Size2,
+                          const Value *SrcValue2, int SrcValueOffset2,
+                          unsigned SrcValueAlign2) const {
+  // If they are the same then they must be aliases.
+  if (Ptr1 == Ptr2) return true;
+
+  // Gather base node and offset information.
+  SDValue Base1, Base2;
+  int64_t Offset1, Offset2;
+  GlobalValue *GV1, *GV2;
+  void *CV1, *CV2;
+  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+
+  // If they have a same base address then check to see if they overlap.
+  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
+    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+  // If we know what the bases are, and they aren't identical, then we know they
+  // cannot alias.
+  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+    return false;
+
+  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+  // compared to the size and offset of the access, we may be able to prove they
+  // do not alias.  This check is conservative for now to catch cases created by
+  // splitting vector types.
+  if ((SrcValueAlign1 == SrcValueAlign2) &&
+      (SrcValueOffset1 != SrcValueOffset2) &&
+      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+    
+    // There is no overlap between these relatively aligned accesses of similar
+    // size, return no alias.
+    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+      return false;
+  }
+  
+  if (CombinerGlobalAA) {
+    // Use alias analysis information.
+    int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+    int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+    int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+    AliasAnalysis::AliasResult AAResult =
+                             AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+    if (AAResult == AliasAnalysis::NoAlias)
+      return false;
+  }
+
+  // Otherwise we have to assume they alias.
+  return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node.  Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+                        SDValue &Ptr, int64_t &Size,
+                        const Value *&SrcValue, 
+                        int &SrcValueOffset,
+                        unsigned &SrcValueAlign) const {
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    Size = LD->getMemoryVT().getSizeInBits() >> 3;
+    SrcValue = LD->getSrcValue();
+    SrcValueOffset = LD->getSrcValueOffset();
+    SrcValueAlign = LD->getOriginalAlignment();
+    return true;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    Size = ST->getMemoryVT().getSizeInBits() >> 3;
+    SrcValue = ST->getSrcValue();
+    SrcValueOffset = ST->getSrcValueOffset();
+    SrcValueAlign = ST->getOriginalAlignment();
+  } else {
+    llvm_unreachable("FindAliasInfo expected a memory operand");
+  }
+
+  return false;
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+                                   SmallVector<SDValue, 8> &Aliases) {
+  SmallVector<SDValue, 8> Chains;     // List of chains to visit.
+  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
+
+  // Get alias information for node.
+  SDValue Ptr;
+  int64_t Size;
+  const Value *SrcValue;
+  int SrcValueOffset;
+  unsigned SrcValueAlign;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 
+                              SrcValueAlign);
+
+  // Starting off.
+  Chains.push_back(OriginalChain);
+  unsigned Depth = 0;
+  
+  // Look at each chain and determine if it is an alias.  If so, add it to the
+  // aliases list.  If not, then continue up the chain looking for the next
+  // candidate.
+  while (!Chains.empty()) {
+    SDValue Chain = Chains.back();
+    Chains.pop_back();
+    
+    // For TokenFactor nodes, look at each operand and only continue up the 
+    // chain until we find two aliases.  If we've seen two aliases, assume we'll 
+    // find more and revert to original chain since the xform is unlikely to be
+    // profitable.
+    // 
+    // FIXME: The depth check could be made to return the last non-aliasing 
+    // chain we found before we hit a tokenfactor rather than the original
+    // chain.
+    if (Depth > 6 || Aliases.size() == 2) {
+      Aliases.clear();
+      Aliases.push_back(OriginalChain);
+      break;
+    }
+
+    // Don't bother if we've been before.
+    if (!Visited.insert(Chain.getNode()))
+      continue;
+
+    switch (Chain.getOpcode()) {
+    case ISD::EntryToken:
+      // Entry token is ideal chain operand, but handled in FindBetterChain.
+      break;
+
+    case ISD::LOAD:
+    case ISD::STORE: {
+      // Get alias information for Chain.
+      SDValue OpPtr;
+      int64_t OpSize;
+      const Value *OpSrcValue;
+      int OpSrcValueOffset;
+      unsigned OpSrcValueAlign;
+      bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+                                    OpSrcValue, OpSrcValueOffset,
+                                    OpSrcValueAlign);
+
+      // If chain is alias then stop here.
+      if (!(IsLoad && IsOpLoad) &&
+          isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+                  OpSrcValueAlign)) {
+        Aliases.push_back(Chain);
+      } else {
+        // Look further up the chain.
+        Chains.push_back(Chain.getOperand(0));
+        ++Depth;
+      }
+      break;
+    }
+
+    case ISD::TokenFactor:
+      // We have to check each of the operands of the token factor for "small"
+      // token factors, so we queue them up.  Adding the operands to the queue
+      // (stack) in reverse order maintains the original order and increases the
+      // likelihood that getNode will find a matching token factor (CSE.)
+      if (Chain.getNumOperands() > 16) {
+        Aliases.push_back(Chain);
+        break;
+      }
+      for (unsigned n = Chain.getNumOperands(); n;)
+        Chains.push_back(Chain.getOperand(--n));
+      ++Depth;
+      break;
+
+    default:
+      // For all other instructions we will just have to take what we can get.
+      Aliases.push_back(Chain);
+      break;
+    }
+  }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+  SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
+
+  // Accumulate all the aliases to this node.
+  GatherAllAliases(N, OldChain, Aliases);
+
+  if (Aliases.size() == 0) {
+    // If no operands then chain to entry token.
+    return DAG.getEntryNode();
+  } else if (Aliases.size() == 1) {
+    // If a single operand then chain to it.  We don't need to revisit it.
+    return Aliases[0];
+  }
+  
+  // Construct a custom tailored token factor.
+  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 
+                     &Aliases[0], Aliases.size());
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+                           CodeGenOpt::Level OptLevel) {
+  /// run - This is the main entry point to this class.
+  ///
+  DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 0000000..35ef5b7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,992 @@
+///===-- FastISel.cpp - Implementation of the FastISel class --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported.  It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time.  For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support.  In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated.  Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time.  Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators.  More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
+using namespace llvm;
+
+unsigned FastISel::getRegForValue(Value *V) {
+  EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+  // Don't handle non-simple values in FastISel.
+  if (!RealVT.isSimple())
+    return 0;
+
+  // Ignore illegal types. We must do this before looking up the value
+  // in ValueMap because Arguments are given virtual registers regardless
+  // of whether FastISel can handle them.
+  MVT VT = RealVT.getSimpleVT();
+  if (!TLI.isTypeLegal(VT)) {
+    // Promote MVT::i1 to a legal type though, because it's common and easy.
+    if (VT == MVT::i1)
+      VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+    else
+      return 0;
+  }
+
+  // Look up the value to see if we already have a register for it. We
+  // cache values defined by Instructions across blocks, and other values
+  // only locally. This is because Instructions already have the SSA
+  // def-dominates-use requirement enforced.
+  if (ValueMap.count(V))
+    return ValueMap[V];
+  unsigned Reg = LocalValueMap[V];
+  if (Reg != 0)
+    return Reg;
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (CI->getValue().getActiveBits() <= 64)
+      Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+  } else if (isa<AllocaInst>(V)) {
+    Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+  } else if (isa<ConstantPointerNull>(V)) {
+    // Translate this as an integer zero so that it can be
+    // local-CSE'd with actual integer zeros.
+    Reg =
+      getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+
+    if (!Reg) {
+      const APFloat &Flt = CF->getValueAPF();
+      EVT IntVT = TLI.getPointerTy();
+
+      uint64_t x[2];
+      uint32_t IntBitWidth = IntVT.getSizeInBits();
+      bool isExact;
+      (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+                                APFloat::rmTowardZero, &isExact);
+      if (isExact) {
+        APInt IntVal(IntBitWidth, 2, x);
+
+        unsigned IntegerReg =
+          getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+        if (IntegerReg != 0)
+          Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
+      }
+    }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (!SelectOperator(CE, CE->getOpcode())) return 0;
+    Reg = LocalValueMap[CE];
+  } else if (isa<UndefValue>(V)) {
+    Reg = createResultReg(TLI.getRegClassFor(VT));
+    BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+  }
+  
+  // If target-independent code couldn't handle the value, give target-specific
+  // code a try.
+  if (!Reg && isa<Constant>(V))
+    Reg = TargetMaterializeConstant(cast<Constant>(V));
+  
+  // Don't cache constant materializations in the general ValueMap.
+  // To do so would require tracking what uses they dominate.
+  if (Reg != 0)
+    LocalValueMap[V] = Reg;
+  return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(Value *V) {
+  // Look up the value to see if we already have a register for it. We
+  // cache values defined by Instructions across blocks, and other values
+  // only locally. This is because Instructions already have the SSA
+  // def-dominatess-use requirement enforced.
+  if (ValueMap.count(V))
+    return ValueMap[V];
+  return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value.  It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
+  if (!isa<Instruction>(I)) {
+    LocalValueMap[I] = Reg;
+    return Reg;
+  }
+  
+  unsigned &AssignedReg = ValueMap[I];
+  if (AssignedReg == 0)
+    AssignedReg = Reg;
+  else if (Reg != AssignedReg) {
+    const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
+    TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
+                     Reg, RegClass, RegClass);
+  }
+  return AssignedReg;
+}
+
+unsigned FastISel::getRegForGEPIndex(Value *Idx) {
+  unsigned IdxN = getRegForValue(Idx);
+  if (IdxN == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return 0;
+
+  // If the index is smaller or larger than intptr_t, truncate or extend it.
+  MVT PtrVT = TLI.getPointerTy();
+  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+  if (IdxVT.bitsLT(PtrVT))
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
+  else if (IdxVT.bitsGT(PtrVT))
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
+  return IdxN;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
+  EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+  if (VT == MVT::Other || !VT.isSimple())
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  // We only handle legal types. For example, on x86-32 the instruction
+  // selector contains all of the 64-bit instructions from x86-64,
+  // under the assumption that i64 won't be used if the target doesn't
+  // support it.
+  if (!TLI.isTypeLegal(VT)) {
+    // MVT::i1 is special. Allow AND, OR, or XOR because they
+    // don't require additional zeroing, which makes them easy.
+    if (VT == MVT::i1 &&
+        (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+         ISDOpcode == ISD::XOR))
+      VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+    else
+      return false;
+  }
+
+  unsigned Op0 = getRegForValue(I->getOperand(0));
+  if (Op0 == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  // Check if the second operand is a constant and handle it appropriately.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
+                                     ISDOpcode, Op0, CI->getZExtValue());
+    if (ResultReg != 0) {
+      // We successfully emitted code for the given LLVM Instruction.
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  // Check if the second operand is a constant float.
+  if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+    unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+                                     ISDOpcode, Op0, CF);
+    if (ResultReg != 0) {
+      // We successfully emitted code for the given LLVM Instruction.
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  unsigned Op1 = getRegForValue(I->getOperand(1));
+  if (Op1 == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  // Now we have both operands in registers. Emit the instruction.
+  unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+                                   ISDOpcode, Op0, Op1);
+  if (ResultReg == 0)
+    // Target-specific code wasn't able to find a machine opcode for
+    // the given ISD opcode and type. Halt "fast" selection and bail.
+    return false;
+
+  // We successfully emitted code for the given LLVM Instruction.
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool FastISel::SelectGetElementPtr(User *I) {
+  unsigned N = getRegForValue(I->getOperand(0));
+  if (N == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  const Type *Ty = I->getOperand(0)->getType();
+  MVT VT = TLI.getPointerTy();
+  for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
+       OI != E; ++OI) {
+    Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
+        // FIXME: This can be optimized by combining the add with a
+        // subsequent one.
+        N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+        if (N == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+      }
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->getZExtValue() == 0) continue;
+        uint64_t Offs = 
+          TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+        if (N == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+        continue;
+      }
+      
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+      unsigned IdxN = getRegForGEPIndex(Idx);
+      if (IdxN == 0)
+        // Unhandled operand. Halt "fast" selection and bail.
+        return false;
+
+      if (ElementSize != 1) {
+        IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
+        if (IdxN == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+      }
+      N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
+      if (N == 0)
+        // Unhandled operand. Halt "fast" selection and bail.
+        return false;
+    }
+  }
+
+  // We successfully emitted code for the given LLVM Instruction.
+  UpdateValueMap(I, N);
+  return true;
+}
+
+bool FastISel::SelectCall(User *I) {
+  Function *F = cast<CallInst>(I)->getCalledFunction();
+  if (!F) return false;
+
+  unsigned IID = F->getIntrinsicID();
+  switch (IID) {
+  default: break;
+  case Intrinsic::dbg_declare: {
+    DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+    if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None)||!DW
+        || !DW->ShouldEmitDwarfDebug())
+      return true;
+
+    Value *Address = DI->getAddress();
+    if (!Address)
+      return true;
+    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI) break;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      StaticAllocaMap.find(AI);
+    if (SI == StaticAllocaMap.end()) break; // VLAs.
+    int FI = SI->second;
+    if (MMI) {
+      if (MDNode *Dbg = DI->getMetadata("dbg"))
+        MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
+    }
+    // Building the map above is target independent.  Generating DBG_VALUE
+    // inline is target dependent; do this now.
+    (void)TargetSelectInstruction(cast<Instruction>(I));
+    return true;
+  }
+  case Intrinsic::eh_exception: {
+    EVT VT = TLI.getValueType(I->getType());
+    switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
+    default: break;
+    case TargetLowering::Expand: {
+      assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!");
+      unsigned Reg = TLI.getExceptionAddressRegister();
+      const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+      unsigned ResultReg = createResultReg(RC);
+      bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                           Reg, RC, RC);
+      assert(InsertedCopy && "Can't copy address registers!");
+      InsertedCopy = InsertedCopy;
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+    }
+    break;
+  }
+  case Intrinsic::eh_selector: {
+    EVT VT = TLI.getValueType(I->getType());
+    switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
+    default: break;
+    case TargetLowering::Expand: {
+      if (MMI) {
+        if (MBB->isLandingPad())
+          AddCatchInfo(*cast<CallInst>(I), MMI, MBB);
+        else {
+#ifndef NDEBUG
+          CatchInfoLost.insert(cast<CallInst>(I));
+#endif
+          // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+          unsigned Reg = TLI.getExceptionSelectorRegister();
+          if (Reg) MBB->addLiveIn(Reg);
+        }
+
+        unsigned Reg = TLI.getExceptionSelectorRegister();
+        EVT SrcVT = TLI.getPointerTy();
+        const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+        unsigned ResultReg = createResultReg(RC);
+        bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
+                                             RC, RC);
+        assert(InsertedCopy && "Can't copy address registers!");
+        InsertedCopy = InsertedCopy;
+
+        // Cast the register to the type of the selector.
+        if (SrcVT.bitsGT(MVT::i32))
+          ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+                                 ResultReg);
+        else if (SrcVT.bitsLT(MVT::i32))
+          ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+                                 ISD::SIGN_EXTEND, ResultReg);
+        if (ResultReg == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+
+        UpdateValueMap(I, ResultReg);
+      } else {
+        unsigned ResultReg =
+          getRegForValue(Constant::getNullValue(I->getType()));
+        UpdateValueMap(I, ResultReg);
+      }
+      return true;
+    }
+    }
+    break;
+  }
+  }
+  return false;
+}
+
+bool FastISel::SelectCast(User *I, unsigned Opcode) {
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
+    
+  if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+      DstVT == MVT::Other || !DstVT.isSimple())
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+    
+  // Check if the destination type is legal. Or as a special case,
+  // it may be i1 if we're doing a truncate because that's
+  // easy and somewhat common.
+  if (!TLI.isTypeLegal(DstVT))
+    if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
+      // Unhandled type. Halt "fast" selection and bail.
+      return false;
+
+  // Check if the source operand is legal. Or as a special case,
+  // it may be i1 if we're doing zero-extension because that's
+  // easy and somewhat common.
+  if (!TLI.isTypeLegal(SrcVT))
+    if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
+      // Unhandled type. Halt "fast" selection and bail.
+      return false;
+
+  unsigned InputReg = getRegForValue(I->getOperand(0));
+  if (!InputReg)
+    // Unhandled operand.  Halt "fast" selection and bail.
+    return false;
+
+  // If the operand is i1, arrange for the high bits in the register to be zero.
+  if (SrcVT == MVT::i1) {
+   SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
+   InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
+   if (!InputReg)
+     return false;
+  }
+  // If the result is i1, truncate to the target's type for i1 first.
+  if (DstVT == MVT::i1)
+    DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
+
+  unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+                                  DstVT.getSimpleVT(),
+                                  Opcode,
+                                  InputReg);
+  if (!ResultReg)
+    return false;
+    
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool FastISel::SelectBitCast(User *I) {
+  // If the bitcast doesn't change the type, just use the operand value.
+  if (I->getType() == I->getOperand(0)->getType()) {
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0)
+      return false;
+    UpdateValueMap(I, Reg);
+    return true;
+  }
+
+  // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
+  
+  if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+      DstVT == MVT::Other || !DstVT.isSimple() ||
+      !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+  
+  unsigned Op0 = getRegForValue(I->getOperand(0));
+  if (Op0 == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+  
+  // First, try to perform the bitcast by inserting a reg-reg copy.
+  unsigned ResultReg = 0;
+  if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+    TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+    TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+    ResultReg = createResultReg(DstClass);
+    
+    bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                         Op0, DstClass, SrcClass);
+    if (!InsertedCopy)
+      ResultReg = 0;
+  }
+  
+  // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+  if (!ResultReg)
+    ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+                           ISD::BIT_CONVERT, Op0);
+  
+  if (!ResultReg)
+    return false;
+  
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool
+FastISel::SelectInstruction(Instruction *I) {
+  // First, try doing target-independent selection.
+  if (SelectOperator(I, I->getOpcode()))
+    return true;
+
+  // Next, try calling the target to attempt to handle the instruction.
+  if (TargetSelectInstruction(I))
+    return true;
+
+  return false;
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
+  if (MBB->isLayoutSuccessor(MSucc)) {
+    // The unconditional fall-through case, which needs no instructions.
+  } else {
+    // The unconditional branch case.
+    TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());
+  }
+  MBB->addSuccessor(MSucc);
+}
+
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(User *I) {
+  unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+  if (OpReg == 0) return false;
+
+  // If the target has ISD::FNEG, use it.
+  EVT VT = TLI.getValueType(I->getType());
+  unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+                                  ISD::FNEG, OpReg);
+  if (ResultReg != 0) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  // Bitcast the value to integer, twiddle the sign bit with xor,
+  // and then bitcast it back to floating-point.
+  if (VT.getSizeInBits() > 64) return false;
+  EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+  if (!TLI.isTypeLegal(IntVT))
+    return false;
+
+  unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+                               ISD::BIT_CONVERT, OpReg);
+  if (IntReg == 0)
+    return false;
+
+  unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg,
+                                       UINT64_C(1) << (VT.getSizeInBits()-1),
+                                       IntVT.getSimpleVT());
+  if (IntResultReg == 0)
+    return false;
+
+  ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+                         ISD::BIT_CONVERT, IntResultReg);
+  if (ResultReg == 0)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool
+FastISel::SelectOperator(User *I, unsigned Opcode) {
+  switch (Opcode) {
+  case Instruction::Add:
+    return SelectBinaryOp(I, ISD::ADD);
+  case Instruction::FAdd:
+    return SelectBinaryOp(I, ISD::FADD);
+  case Instruction::Sub:
+    return SelectBinaryOp(I, ISD::SUB);
+  case Instruction::FSub:
+    // FNeg is currently represented in LLVM IR as a special case of FSub.
+    if (BinaryOperator::isFNeg(I))
+      return SelectFNeg(I);
+    return SelectBinaryOp(I, ISD::FSUB);
+  case Instruction::Mul:
+    return SelectBinaryOp(I, ISD::MUL);
+  case Instruction::FMul:
+    return SelectBinaryOp(I, ISD::FMUL);
+  case Instruction::SDiv:
+    return SelectBinaryOp(I, ISD::SDIV);
+  case Instruction::UDiv:
+    return SelectBinaryOp(I, ISD::UDIV);
+  case Instruction::FDiv:
+    return SelectBinaryOp(I, ISD::FDIV);
+  case Instruction::SRem:
+    return SelectBinaryOp(I, ISD::SREM);
+  case Instruction::URem:
+    return SelectBinaryOp(I, ISD::UREM);
+  case Instruction::FRem:
+    return SelectBinaryOp(I, ISD::FREM);
+  case Instruction::Shl:
+    return SelectBinaryOp(I, ISD::SHL);
+  case Instruction::LShr:
+    return SelectBinaryOp(I, ISD::SRL);
+  case Instruction::AShr:
+    return SelectBinaryOp(I, ISD::SRA);
+  case Instruction::And:
+    return SelectBinaryOp(I, ISD::AND);
+  case Instruction::Or:
+    return SelectBinaryOp(I, ISD::OR);
+  case Instruction::Xor:
+    return SelectBinaryOp(I, ISD::XOR);
+
+  case Instruction::GetElementPtr:
+    return SelectGetElementPtr(I);
+
+  case Instruction::Br: {
+    BranchInst *BI = cast<BranchInst>(I);
+
+    if (BI->isUnconditional()) {
+      BasicBlock *LLVMSucc = BI->getSuccessor(0);
+      MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
+      FastEmitBranch(MSucc);
+      return true;
+    }
+
+    // Conditional branches are not handed yet.
+    // Halt "fast" selection and bail.
+    return false;
+  }
+
+  case Instruction::Unreachable:
+    // Nothing to emit.
+    return true;
+
+  case Instruction::PHI:
+    // PHI nodes are already emitted.
+    return true;
+
+  case Instruction::Alloca:
+    // FunctionLowering has the static-sized case covered.
+    if (StaticAllocaMap.count(cast<AllocaInst>(I)))
+      return true;
+
+    // Dynamic-sized alloca is not handled yet.
+    return false;
+    
+  case Instruction::Call:
+    return SelectCall(I);
+  
+  case Instruction::BitCast:
+    return SelectBitCast(I);
+
+  case Instruction::FPToSI:
+    return SelectCast(I, ISD::FP_TO_SINT);
+  case Instruction::ZExt:
+    return SelectCast(I, ISD::ZERO_EXTEND);
+  case Instruction::SExt:
+    return SelectCast(I, ISD::SIGN_EXTEND);
+  case Instruction::Trunc:
+    return SelectCast(I, ISD::TRUNCATE);
+  case Instruction::SIToFP:
+    return SelectCast(I, ISD::SINT_TO_FP);
+
+  case Instruction::IntToPtr: // Deliberate fall-through.
+  case Instruction::PtrToInt: {
+    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(I->getType());
+    if (DstVT.bitsGT(SrcVT))
+      return SelectCast(I, ISD::ZERO_EXTEND);
+    if (DstVT.bitsLT(SrcVT))
+      return SelectCast(I, ISD::TRUNCATE);
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0) return false;
+    UpdateValueMap(I, Reg);
+    return true;
+  }
+
+  default:
+    // Unhandled instruction. Halt "fast" selection and bail.
+    return false;
+  }
+}
+
+FastISel::FastISel(MachineFunction &mf,
+                   MachineModuleInfo *mmi,
+                   DwarfWriter *dw,
+                   DenseMap<const Value *, unsigned> &vm,
+                   DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
+                   DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+                   , SmallSet<Instruction*, 8> &cil
+#endif
+                   )
+  : MBB(0),
+    ValueMap(vm),
+    MBBMap(bm),
+    StaticAllocaMap(am),
+#ifndef NDEBUG
+    CatchInfoLost(cil),
+#endif
+    MF(mf),
+    MMI(mmi),
+    DW(dw),
+    MRI(MF.getRegInfo()),
+    MFI(*MF.getFrameInfo()),
+    MCP(*MF.getConstantPool()),
+    TM(MF.getTarget()),
+    TD(*TM.getTargetData()),
+    TII(*TM.getInstrInfo()),
+    TLI(*TM.getTargetLowering()) {
+}
+
+FastISel::~FastISel() {}
+
+unsigned FastISel::FastEmit_(MVT, MVT,
+                             unsigned) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT, MVT,
+                              unsigned, unsigned /*Op0*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT, MVT, 
+                               unsigned, unsigned /*Op0*/,
+                               unsigned /*Op0*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT, MVT,
+                              unsigned, ConstantFP * /*FPImm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT, MVT,
+                               unsigned, unsigned /*Op0*/,
+                               uint64_t /*Imm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT, MVT,
+                               unsigned, unsigned /*Op0*/,
+                               ConstantFP * /*FPImm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT, MVT,
+                                unsigned,
+                                unsigned /*Op0*/, unsigned /*Op1*/,
+                                uint64_t /*Imm*/) {
+  return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
+                                unsigned Op0, uint64_t Imm,
+                                MVT ImmType) {
+  // First check if immediate type is legal. If not, we can't use the ri form.
+  unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
+  if (ResultReg != 0)
+    return ResultReg;
+  unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+  if (MaterialReg == 0)
+    return 0;
+  return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with a floating-point immediate operand using
+/// FastEmit_rf. If that fails, it materializes the immediate into a register
+/// and try FastEmit_rr instead.
+unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
+                                unsigned Op0, ConstantFP *FPImm,
+                                MVT ImmType) {
+  // First check if immediate type is legal. If not, we can't use the rf form.
+  unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
+  if (ResultReg != 0)
+    return ResultReg;
+
+  // Materialize the constant in a register.
+  unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
+  if (MaterialReg == 0) {
+    // If the target doesn't have a way to directly enter a floating-point
+    // value into a register, use an alternate approach.
+    // TODO: The current approach only supports floating-point constants
+    // that can be constructed by conversion from integer values. This should
+    // be replaced by code that creates a load from a constant-pool entry,
+    // which will require some target-specific work.
+    const APFloat &Flt = FPImm->getValueAPF();
+    EVT IntVT = TLI.getPointerTy();
+
+    uint64_t x[2];
+    uint32_t IntBitWidth = IntVT.getSizeInBits();
+    bool isExact;
+    (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+                             APFloat::rmTowardZero, &isExact);
+    if (!isExact)
+      return 0;
+    APInt IntVal(IntBitWidth, 2, x);
+
+    unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
+                                     ISD::Constant, IntVal.getZExtValue());
+    if (IntegerReg == 0)
+      return 0;
+    MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
+                             ISD::SINT_TO_FP, IntegerReg);
+    if (MaterialReg == 0)
+      return 0;
+  }
+  return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+  return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+                                 const TargetRegisterClass* RC) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  BuildMI(MBB, DL, II, ResultReg);
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  unsigned Op0) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(MBB, DL, II, ResultReg).addReg(Op0);
+  else {
+    BuildMI(MBB, DL, II).addReg(Op0);
+    bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                         II.ImplicitDefs[0], RC, RC);
+    if (!InsertedCopy)
+      ResultReg = 0;
+  }
+
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, unsigned Op1) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1);
+  else {
+    BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1);
+    bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                         II.ImplicitDefs[0], RC, RC);
+    if (!InsertedCopy)
+      ResultReg = 0;
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm);
+  else {
+    BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm);
+    bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                         II.ImplicitDefs[0], RC, RC);
+    if (!InsertedCopy)
+      ResultReg = 0;
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, ConstantFP *FPImm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm);
+  else {
+    BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm);
+    bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                         II.ImplicitDefs[0], RC, RC);
+    if (!InsertedCopy)
+      ResultReg = 0;
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    unsigned Op0, unsigned Op1, uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm);
+  else {
+    BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm);
+    bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                         II.ImplicitDefs[0], RC, RC);
+    if (!InsertedCopy)
+      ResultReg = 0;
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  
+  if (II.getNumDefs() >= 1)
+    BuildMI(MBB, DL, II, ResultReg).addImm(Imm);
+  else {
+    BuildMI(MBB, DL, II).addImm(Imm);
+    bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                         II.ImplicitDefs[0], RC, RC);
+    if (!InsertedCopy)
+      ResultReg = 0;
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
+                                              unsigned Op0, uint32_t Idx) {
+  const TargetRegisterClass* RC = MRI.getRegClass(Op0);
+  
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+  const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG);
+  
+  if (II.getNumDefs() >= 1)
+    BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);
+  else {
+    BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx);
+    bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+                                         II.ImplicitDefs[0], RC, RC);
+    if (!InsertedCopy)
+      ResultReg = 0;
+  }
+  return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
+  return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 0000000..50f4c32
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,354 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "FunctionLoweringInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                                  const unsigned *Indices,
+                                  const unsigned *IndicesEnd,
+                                  unsigned CurIndex) {
+  // Base case: We're done.
+  if (Indices && Indices == IndicesEnd)
+    return CurIndex;
+
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+        EI != EE; ++EI) {
+      if (Indices && *Indices == unsigned(EI - EB))
+        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // Given an array type, recursively traverse the elements.
+  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      if (Indices && *Indices == i)
+        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // We haven't found the type we're looking for, so keep searching.
+  return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                           SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI)
+      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+                      StartingOffset + SL->getElementOffset(EI - EB));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+                      StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty->isVoidTy())
+    return;
+  // Base case: we can get an EVT for this LLVM IR type.
+  ValueVTs.push_back(TLI.getValueType(Ty));
+  if (Offsets)
+    Offsets->push_back(StartingOffset);
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+  if (isa<PHINode>(I)) return true;
+  BasicBlock *BB = I->getParent();
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
+      return true;
+  return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+  // With FastISel active, we may be splitting blocks, so force creation
+  // of virtual registers for all non-dead arguments.
+  // Don't force virtual registers for byval arguments though, because
+  // fast-isel can't handle those in all cases.
+  if (EnableFastISel && !A->hasByValAttr())
+    return A->use_empty();
+
+  BasicBlock *Entry = A->getParent()->begin();
+  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+      return false;  // Use not in entry block.
+  return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+  : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+                               bool EnableFastISel) {
+  Fn = &fn;
+  MF = &mf;
+  RegInfo = &MF->getRegInfo();
+
+  // Create a vreg for each argument register that is not dead and is used
+  // outside of the entry block for the function.
+  for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+       AI != E; ++AI)
+    if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
+      InitializeRegForValue(AI);
+
+  // Initialize the mapping of values to registers.  This is only set up for
+  // instruction values that are used outside of the block that defines
+  // them.
+  Function::iterator BB = Fn->begin(), EB = Fn->end();
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+        const Type *Ty = AI->getAllocatedType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align =
+          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+                   AI->getAlignment());
+
+        TySize *= CUI->getZExtValue();   // Get total allocated size.
+        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+        StaticAllocaMap[AI] =
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
+      }
+
+  for (; BB != EB; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+        if (!isa<AllocaInst>(I) ||
+            !StaticAllocaMap.count(cast<AllocaInst>(I)))
+          InitializeRegForValue(I);
+
+  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
+  // also creates the initial PHI MachineInstrs, though none of the input
+  // operands are populated.
+  for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+    MBBMap[BB] = MBB;
+    MF->push_back(MBB);
+
+    // Transfer the address-taken flag. This is necessary because there could
+    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+    // the first one should be marked.
+    if (BB->hasAddressTaken())
+      MBB->setHasAddressTaken();
+
+    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+    // appropriate.
+    PHINode *PN;
+    DebugLoc DL;
+    for (BasicBlock::iterator
+           I = BB->begin(), E = BB->end(); I != E; ++I) {
+
+      PN = dyn_cast<PHINode>(I);
+      if (!PN || PN->use_empty()) continue;
+
+      unsigned PHIReg = ValueMap[PN];
+      assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+        const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+        for (unsigned i = 0; i != NumRegisters; ++i)
+          BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+        PHIReg += NumRegisters;
+      }
+    }
+  }
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+  MBBMap.clear();
+  ValueMap.clear();
+  StaticAllocaMap.clear();
+#ifndef NDEBUG
+  CatchInfoLost.clear();
+  CatchInfoFound.clear();
+#endif
+  LiveOutRegInfo.clear();
+}
+
+unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
+  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types.  Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, V->getType(), ValueVTs);
+
+  unsigned FirstReg = 0;
+  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
+
+    unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      unsigned R = MakeReg(RegisterVT);
+      if (!FirstReg) FirstReg = R;
+    }
+  }
+  return FirstReg;
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+  V = V->stripPointerCasts();
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+  assert ((GV || isa<ConstantPointerNull>(V)) &&
+          "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+                        MachineBasicBlock *MBB) {
+  // Inform the MachineModuleInfo of the personality for this landing pad.
+  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+  assert(CE->getOpcode() == Instruction::BitCast &&
+         isa<Function>(CE->getOperand(0)) &&
+         "Personality should be a function");
+  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+  // Gather all the type infos for this landing pad and pass them along to
+  // MachineModuleInfo.
+  std::vector<GlobalVariable *> TyInfo;
+  unsigned N = I.getNumOperands();
+
+  for (unsigned i = N - 1; i > 2; --i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+      unsigned FilterLength = CI->getZExtValue();
+      unsigned FirstCatch = i + FilterLength + !FilterLength;
+      assert (FirstCatch <= N && "Invalid filter length");
+
+      if (FirstCatch < N) {
+        TyInfo.reserve(N - FirstCatch);
+        for (unsigned j = FirstCatch; j < N; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+        MMI->addCatchTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      if (!FilterLength) {
+        // Cleanup.
+        MMI->addCleanup(MBB);
+      } else {
+        // Filter.
+        TyInfo.reserve(FilterLength - 1);
+        for (unsigned j = i + 1; j < FirstCatch; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+        MMI->addFilterTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      N = i;
+    }
+  }
+
+  if (N > 3) {
+    TyInfo.reserve(N - 3);
+    for (unsigned j = 3; j < N; ++j)
+      TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+    MMI->addCatchTypeInfo(MBB, TyInfo);
+  }
+}
+
+void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+                         MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+    if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+      // Apply the catch info to DestBB.
+      AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+      if (!FLI.MBBMap[SrcBB]->isLandingPad())
+        FLI.CatchInfoFound.insert(EHSel);
+#endif
+    }
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
new file mode 100644
index 0000000..d851e64
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
@@ -0,0 +1,151 @@
+//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FUNCTIONLOWERINGINFO_H
+#define FUNCTIONLOWERINGINFO_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/ValueTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class AllocaInst;
+class BasicBlock;
+class CallInst;
+class Function;
+class GlobalVariable;
+class Instruction;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class TargetLowering;
+class Value;
+
+//===--------------------------------------------------------------------===//
+/// FunctionLoweringInfo - This contains information that is global to a
+/// function that is used when lowering a region of the function.
+///
+class FunctionLoweringInfo {
+public:
+  TargetLowering &TLI;
+  Function *Fn;
+  MachineFunction *MF;
+  MachineRegisterInfo *RegInfo;
+
+  /// CanLowerReturn - true iff the function's return value can be lowered to
+  /// registers.
+  bool CanLowerReturn;
+
+  /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
+  /// allocated to hold a pointer to the hidden sret parameter.
+  unsigned DemoteRegister;
+
+  explicit FunctionLoweringInfo(TargetLowering &TLI);
+
+  /// set - Initialize this FunctionLoweringInfo with the given Function
+  /// and its associated MachineFunction.
+  ///
+  void set(Function &Fn, MachineFunction &MF, bool EnableFastISel);
+
+  /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+  DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+  /// ValueMap - Since we emit code for the function a basic block at a time,
+  /// we must remember which virtual registers hold the values for
+  /// cross-basic-block values.
+  DenseMap<const Value*, unsigned> ValueMap;
+
+  /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+  /// the entry block.  This allows the allocas to be efficiently referenced
+  /// anywhere in the function.
+  DenseMap<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+  SmallSet<Instruction*, 8> CatchInfoLost;
+  SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+  unsigned MakeReg(EVT VT);
+  
+  /// isExportedInst - Return true if the specified value is an instruction
+  /// exported from its block.
+  bool isExportedInst(const Value *V) {
+    return ValueMap.count(V);
+  }
+
+  unsigned CreateRegForValue(const Value *V);
+  
+  unsigned InitializeRegForValue(const Value *V) {
+    unsigned &R = ValueMap[V];
+    assert(R == 0 && "Already initialized this value register!");
+    return R = CreateRegForValue(V);
+  }
+  
+  struct LiveOutInfo {
+    unsigned NumSignBits;
+    APInt KnownOne, KnownZero;
+    LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
+  };
+  
+  /// LiveOutRegInfo - Information about live out vregs, indexed by their
+  /// register number offset by 'FirstVirtualRegister'.
+  std::vector<LiveOutInfo> LiveOutRegInfo;
+
+  /// clear - Clear out all the function-specific state. This returns this
+  /// FunctionLoweringInfo to an empty state, ready to be used for a
+  /// different function.
+  void clear();
+};
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                            const unsigned *Indices,
+                            const unsigned *IndicesEnd,
+                            unsigned CurIndex = 0);
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                     SmallVectorImpl<EVT> &ValueVTs,
+                     SmallVectorImpl<uint64_t> *Offsets = 0,
+                     uint64_t StartingOffset = 0);
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *ExtractTypeInfo(Value *V);
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB);
+
+/// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
+void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+                   MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 0000000..02fe85d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,701 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+  unsigned N = Node->getNumValues();
+  while (N && Node->getValueType(N - 1) == MVT::Flag)
+    --N;
+  if (N && Node->getValueType(N - 1) == MVT::Other)
+    --N;    // Skip over chain result.
+  return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional flag operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+unsigned InstrEmitter::CountOperands(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+    --N;
+  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+    --N; // Ignore chain if it exists.
+  return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VRBase = 0;
+  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+    // Just use the input register directly!
+    SDValue Op(Node, ResNo);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+    isNew = isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+    return;
+  }
+
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  bool MatchReg = true;
+  const TargetRegisterClass *UseRC = NULL;
+  if (!IsClone && !IsCloned)
+    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+         UI != E; ++UI) {
+      SDNode *User = *UI;
+      bool Match = true;
+      if (User->getOpcode() == ISD::CopyToReg && 
+          User->getOperand(2).getNode() == Node &&
+          User->getOperand(2).getResNo() == ResNo) {
+        unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+          VRBase = DestReg;
+          Match = false;
+        } else if (DestReg != SrcReg)
+          Match = false;
+      } else {
+        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+          SDValue Op = User->getOperand(i);
+          if (Op.getNode() != Node || Op.getResNo() != ResNo)
+            continue;
+          EVT VT = Node->getValueType(Op.getResNo());
+          if (VT == MVT::Other || VT == MVT::Flag)
+            continue;
+          Match = false;
+          if (User->isMachineOpcode()) {
+            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+            const TargetRegisterClass *RC = 0;
+            if (i+II.getNumDefs() < II.getNumOperands())
+              RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
+            if (!UseRC)
+              UseRC = RC;
+            else if (RC) {
+              const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+              // If multiple uses expect disjoint register classes, we emit
+              // copies in AddRegisterOperand.
+              if (ComRC)
+                UseRC = ComRC;
+            }
+          }
+        }
+      }
+      MatchReg &= Match;
+      if (VRBase)
+        break;
+    }
+
+  EVT VT = Node->getValueType(ResNo);
+  const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+  SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
+  
+  // Figure out the register class to create for the destreg.
+  if (VRBase) {
+    DstRC = MRI->getRegClass(VRBase);
+  } else if (UseRC) {
+    assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+    DstRC = UseRC;
+  } else {
+    DstRC = TLI->getRegClassFor(VT);
+  }
+    
+  // If all uses are reading from the src physical register and copying the
+  // register is either impossible or very expensive, then don't create a copy.
+  if (MatchReg && SrcRC->getCopyCost() < 0) {
+    VRBase = SrcReg;
+  } else {
+    // Create the reg, emit the copy.
+    VRBase = MRI->createVirtualRegister(DstRC);
+    bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
+                                     DstRC, SrcRC);
+
+    assert(Emitted && "Unable to issue a copy instruction!\n");
+    (void) Emitted;
+  }
+
+  SDValue Op(Node, ResNo);
+  if (IsClone)
+    VRBaseMap.erase(Op);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                                unsigned ResNo) const {
+  if (!Node->hasOneUse())
+    return 0;
+
+  SDNode *User = *Node->use_begin();
+  if (User->getOpcode() == ISD::CopyToReg && 
+      User->getOperand(2).getNode() == Node &&
+      User->getOperand(2).getResNo() == ResNo) {
+    unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return Reg;
+  }
+  return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                                       const TargetInstrDesc &II,
+                                       bool IsClone, bool IsCloned,
+                                       DenseMap<SDValue, unsigned> &VRBaseMap) {
+  assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+         "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+  for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+    // If the specific node value is only used by a CopyToReg and the dest reg
+    // is a vreg in the same register class, use the CopyToReg'd destination
+    // register instead of creating a new vreg.
+    unsigned VRBase = 0;
+    const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI);
+    if (II.OpInfo[i].isOptionalDef()) {
+      // Optional def must be a physical register.
+      unsigned NumResults = CountResults(Node);
+      VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+      assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    if (!VRBase && !IsClone && !IsCloned)
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *User = *UI;
+        if (User->getOpcode() == ISD::CopyToReg && 
+            User->getOperand(2).getNode() == Node &&
+            User->getOperand(2).getResNo() == i) {
+          unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+          if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+            const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+            if (RegRC == RC) {
+              VRBase = Reg;
+              MI->addOperand(MachineOperand::CreateReg(Reg, true));
+              break;
+            }
+          }
+        }
+      }
+
+    // Create the result registers for this node and add the result regs to
+    // the machine instruction.
+    if (VRBase == 0) {
+      assert(RC && "Isn't a register operand!");
+      VRBase = MRI->createVirtualRegister(RC);
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    SDValue Op(Node, i);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+    isNew = isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+  }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+                             DenseMap<SDValue, unsigned> &VRBaseMap) {
+  if (Op.isMachineOpcode() &&
+      Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+    // Add an IMPLICIT_DEF instruction before every use.
+    unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+    // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+    // does not include operand register class info.
+    if (!VReg) {
+      const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+      VReg = MRI->createVirtualRegister(RC);
+    }
+    BuildMI(MBB, Op.getDebugLoc(),
+            TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+    return VReg;
+  }
+
+  DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+  assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+  return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                                 unsigned IIOpNum,
+                                 const TargetInstrDesc *II,
+                                 DenseMap<SDValue, unsigned> &VRBaseMap) {
+  assert(Op.getValueType() != MVT::Other &&
+         Op.getValueType() != MVT::Flag &&
+         "Chain and flag operands should occur at end of operand list!");
+  // Get/emit the operand.
+  unsigned VReg = getVR(Op, VRBaseMap);
+  assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  bool isOptDef = IIOpNum < TID.getNumOperands() &&
+    TID.OpInfo[IIOpNum].isOptionalDef();
+
+  // If the instruction requires a register in a different class, create
+  // a new virtual register and copy the value into it.
+  if (II) {
+    const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+    const TargetRegisterClass *DstRC = 0;
+    if (IIOpNum < II->getNumOperands())
+      DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
+    assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+           "Don't have operand info for this instruction!");
+    if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+      unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+      bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
+                                       DstRC, SrcRC);
+      assert(Emitted && "Unable to issue a copy instruction!\n");
+      (void) Emitted;
+      VReg = NewVReg;
+    }
+  }
+
+  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr.  II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for 
+/// assertions only.
+void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+                              unsigned IIOpNum,
+                              const TargetInstrDesc *II,
+                              DenseMap<SDValue, unsigned> &VRBaseMap) {
+  if (Op.isMachineOpcode()) {
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+  } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+  } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+    const ConstantFP *CFP = F->getConstantFPValue();
+    MI->addOperand(MachineOperand::CreateFPImm(CFP));
+  } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+  } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+                                            TGA->getTargetFlags()));
+  } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+  } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+  } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+                                             JT->getTargetFlags()));
+  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+    int Offset = CP->getOffset();
+    unsigned Align = CP->getAlignment();
+    const Type *Type = CP->getType();
+    // MachineConstantPool wants an explicit alignment.
+    if (Align == 0) {
+      Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+      if (Align == 0) {
+        // Alignment of vector types.  FIXME!
+        Align = TM->getTargetData()->getTypeAllocSize(Type);
+      }
+    }
+    
+    unsigned Idx;
+    MachineConstantPool *MCP = MF->getConstantPool();
+    if (CP->isMachineConstantPoolEntry())
+      Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+    else
+      Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+                                             CP->getTargetFlags()));
+  } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
+                                            ES->getTargetFlags()));
+  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+                                            BA->getTargetFlags()));
+  } else {
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Flag &&
+           "Chain and flag operands should occur at end of operand list!");
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+  }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+                         unsigned SubIdx, EVT VT) {
+  // Pick the register class of the superegister for this type
+  for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+         E = TRC->superregclasses_end(); I != E; ++I)
+    if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+      return *I;
+  assert(false && "Couldn't find the register class");
+  return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node, 
+                                  DenseMap<SDValue, unsigned> &VRBaseMap){
+  unsigned VRBase = 0;
+  unsigned Opc = Node->getMachineOpcode();
+  
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+       UI != E; ++UI) {
+    SDNode *User = *UI;
+    if (User->getOpcode() == ISD::CopyToReg && 
+        User->getOperand(2).getNode() == Node) {
+      unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+        VRBase = DestReg;
+        break;
+      }
+    }
+  }
+  
+  if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+    unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+    // Create the extract_subreg machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                               TII->get(TargetOpcode::EXTRACT_SUBREG));
+
+    // Figure out the register class to create for the destreg.
+    unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+    const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+    const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+    assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+    // Figure out the register class to create for the destreg.
+    // Note that if we're going to directly use an existing register,
+    // it must be precisely the required class, and not a subclass
+    // thereof.
+    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+      // Create the reg
+      assert(SRC && "Couldn't find source register class");
+      VRBase = MRI->createVirtualRegister(SRC);
+    }
+
+    // Add def, source, and subreg index
+    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+    MI->addOperand(MachineOperand::CreateImm(SubIdx));
+    MBB->insert(InsertPos, MI);
+  } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+             Opc == TargetOpcode::SUBREG_TO_REG) {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    SDValue N2 = Node->getOperand(2);
+    unsigned SubReg = getVR(N1, VRBaseMap);
+    unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+    const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+    const TargetRegisterClass *SRC =
+      getSuperRegisterRegClass(TRC, SubIdx,
+                               Node->getValueType(0));
+
+    // Figure out the register class to create for the destreg.
+    // Note that if we're going to directly use an existing register,
+    // it must be precisely the required class, and not a subclass
+    // thereof.
+    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+      // Create the reg
+      assert(SRC && "Couldn't find source register class");
+      VRBase = MRI->createVirtualRegister(SRC);
+    }
+
+    // Create the insert_subreg or subreg_to_reg machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
+    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    
+    // If creating a subreg_to_reg, then the first input operand
+    // is an implicit value immediate, otherwise it's a register
+    if (Opc == TargetOpcode::SUBREG_TO_REG) {
+      const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+      MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+    } else
+      AddOperand(MI, N0, 0, 0, VRBaseMap);
+    // Add the subregster being inserted
+    AddOperand(MI, N1, 0, 0, VRBaseMap);
+    MI->addOperand(MachineOperand::CreateImm(SubIdx));
+    MBB->insert(InsertPos, MI);
+  } else
+    llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+     
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+                                     DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+  const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+  const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+
+  // Create the new VReg in the destination class and emit a copy.
+  unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+  bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
+                                   DstRC, SrcRC);
+  assert(Emitted &&
+         "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
+  (void) Emitted;
+
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitNode - Generate machine code for a node and needed dependencies.
+///
+void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+                            DenseMap<SDValue, unsigned> &VRBaseMap,
+                         DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+  // If machine instruction
+  if (Node->isMachineOpcode()) {
+    unsigned Opc = Node->getMachineOpcode();
+    
+    // Handle subreg insert/extract specially
+    if (Opc == TargetOpcode::EXTRACT_SUBREG || 
+        Opc == TargetOpcode::INSERT_SUBREG ||
+        Opc == TargetOpcode::SUBREG_TO_REG) {
+      EmitSubregNode(Node, VRBaseMap);
+      return;
+    }
+
+    // Handle COPY_TO_REGCLASS specially.
+    if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+      EmitCopyToRegClassNode(Node, VRBaseMap);
+      return;
+    }
+
+    if (Opc == TargetOpcode::IMPLICIT_DEF)
+      // We want a unique VR for each IMPLICIT_DEF use.
+      return;
+    
+    const TargetInstrDesc &II = TII->get(Opc);
+    unsigned NumResults = CountResults(Node);
+    unsigned NodeOperands = CountOperands(Node);
+    bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&
+                          II.getImplicitDefs() != 0;
+#ifndef NDEBUG
+    unsigned NumMIOperands = NodeOperands + NumResults;
+    assert((II.getNumOperands() == NumMIOperands ||
+            HasPhysRegOuts || II.isVariadic()) &&
+           "#operands for dag node doesn't match .td file!"); 
+#endif
+
+    // Create the new machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+    
+    // Add result register values for things that are defined by this
+    // instruction.
+    if (NumResults)
+      CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+    
+    // Emit all of the actual operands of this instruction, adding them to the
+    // instruction as appropriate.
+    bool HasOptPRefs = II.getNumDefs() > NumResults;
+    assert((!HasOptPRefs || !HasPhysRegOuts) &&
+           "Unable to cope with optional defs and phys regs defs!");
+    unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+    for (unsigned i = NumSkip; i != NodeOperands; ++i)
+      AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+                 VRBaseMap);
+
+    // Transfer all of the memory reference descriptions of this instruction.
+    MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+                   cast<MachineSDNode>(Node)->memoperands_end());
+
+    if (II.usesCustomInsertionHook()) {
+      // Insert this instruction into the basic block using a target
+      // specific inserter which may returns a new basic block.
+      MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+      InsertPos = MBB->end();
+    } else {
+      MBB->insert(InsertPos, MI);
+    }
+
+    // Additional results must be an physical register def.
+    if (HasPhysRegOuts) {
+      for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+        unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+        if (Node->hasAnyUseOfValue(i))
+          EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+        // If there are no uses, mark the register as dead now, so that
+        // MachineLICM/Sink can see that it's dead. Don't do this if the
+        // node has a Flag value, for the benefit of targets still using
+        // Flag for values in physregs.
+        else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+          MI->addRegisterDead(Reg, TRI);
+      }
+    }
+    return;
+  }
+
+  switch (Node->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    Node->dump();
+#endif
+    llvm_unreachable("This target-independent node should have been selected!");
+    break;
+  case ISD::EntryToken:
+    llvm_unreachable("EntryToken should have been excluded from the schedule!");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor: // fall thru
+    break;
+  case ISD::CopyToReg: {
+    unsigned SrcReg;
+    SDValue SrcVal = Node->getOperand(2);
+    if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+      SrcReg = R->getReg();
+    else
+      SrcReg = getVR(SrcVal, VRBaseMap);
+      
+    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+      break;
+      
+    const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;
+    // Get the register classes of the src/dst.
+    if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+      SrcTRC = MRI->getRegClass(SrcReg);
+    else
+      SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());
+
+    if (TargetRegisterInfo::isVirtualRegister(DestReg))
+      DstTRC = MRI->getRegClass(DestReg);
+    else
+      DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
+                                            Node->getOperand(1).getValueType());
+
+    bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
+                                     DstTRC, SrcTRC);
+    assert(Emitted && "Unable to issue a copy instruction!\n");
+    (void) Emitted;
+    break;
+  }
+  case ISD::CopyFromReg: {
+    unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+    break;
+  }
+  case ISD::INLINEASM: {
+    unsigned NumOps = Node->getNumOperands();
+    if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+      --NumOps;  // Ignore the flag operand.
+      
+    // Create the inline asm machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                               TII->get(TargetOpcode::INLINEASM));
+
+    // Add the asm string as an external symbol operand.
+    const char *AsmStr =
+      cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+    MI->addOperand(MachineOperand::CreateES(AsmStr));
+      
+    // Add all of the operand registers to the instruction.
+    for (unsigned i = 2; i != NumOps;) {
+      unsigned Flags =
+        cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+      unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+        
+      MI->addOperand(MachineOperand::CreateImm(Flags));
+      ++i;  // Skip the ID value.
+        
+      switch (Flags & 7) {
+      default: llvm_unreachable("Bad flags!");
+      case 2:   // Def of register.
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          MI->addOperand(MachineOperand::CreateReg(Reg, true));
+        }
+        break;
+      case 6:   // Def of earlyclobber register.
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, 
+                                                   false, false, true));
+        }
+        break;
+      case 1:  // Use of register.
+      case 3:  // Immediate.
+      case 4:  // Addressing mode.
+        // The addressing mode has been selected, just add all of the
+        // operands to the machine instruction.
+        for (; NumVals; --NumVals, ++i)
+          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+        break;
+      }
+    }
+    MBB->insert(InsertPos, MI);
+    break;
+  }
+  }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+                           MachineBasicBlock::iterator insertpos)
+  : MF(mbb->getParent()),
+    MRI(&MF->getRegInfo()),
+    TM(&MF->getTarget()),
+    TII(TM->getInstrInfo()),
+    TRI(TM->getRegisterInfo()),
+    TLI(TM->getTargetLowering()),
+    MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 0000000..91817e4
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,119 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class TargetInstrDesc;
+
+class InstrEmitter {
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+  const TargetMachine *TM;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+
+  MachineBasicBlock *MBB;
+  MachineBasicBlock::iterator InsertPos;
+
+  /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+  /// implicit physical register output.
+  void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+                       bool IsClone, bool IsCloned,
+                       unsigned SrcReg,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getDstOfCopyToRegUse - If the only use of the specified result number of
+  /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+  unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                    unsigned ResNo) const;
+
+  void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                              const TargetInstrDesc &II,
+                              bool IsClone, bool IsCloned,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getVR - Return the virtual register corresponding to the specified result
+  /// of the specified node.
+  unsigned getVR(SDValue Op,
+                 DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// AddRegisterOperand - Add the specified register as an operand to the
+  /// specified machine instr. Insert register copies if the register is
+  /// not in the required register class.
+  void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                          unsigned IIOpNum,
+                          const TargetInstrDesc *II,
+                          DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// AddOperand - Add the specified operand to the specified machine instr.  II
+  /// specifies the instruction information for the node, and IIOpNum is the
+  /// operand number (in the II) that we are adding. IIOpNum and II are used for
+  /// assertions only.
+  void AddOperand(MachineInstr *MI, SDValue Op,
+                  unsigned IIOpNum,
+                  const TargetInstrDesc *II,
+                  DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitSubregNode - Generate machine code for subreg nodes.
+  ///
+  void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+  /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+  /// register is constrained to be in a particular register class.
+  ///
+  void EmitCopyToRegClassNode(SDNode *Node,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+public:
+  /// CountResults - The results of target nodes have register or immediate
+  /// operands first, then an optional chain, and optional flag operands
+  /// (which do not go into the machine instrs.)
+  static unsigned CountResults(SDNode *Node);
+
+  /// CountOperands - The inputs to target nodes have any actual inputs first,
+  /// followed by an optional chain operand, then flag operands.  Compute
+  /// the number of actual operands that will go into the resulting
+  /// MachineInstr.
+  static unsigned CountOperands(SDNode *Node);
+
+  /// EmitNode - Generate machine code for a node and needed dependencies.
+  ///
+  void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap,
+                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+
+  /// getBlock - Return the current basic block.
+  MachineBasicBlock *getBlock() { return MBB; }
+
+  /// getInsertPos - Return the current insertion position.
+  MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+  /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+  /// at the given position in the given block.
+  InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000..78e6e4e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3071 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it.  This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing.  For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class SelectionDAGLegalize {
+  TargetLowering &TLI;
+  SelectionDAG &DAG;
+  CodeGenOpt::Level OptLevel;
+
+  // Libcall insertion helpers.
+
+  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+  /// legalized.  We use this to ensure that calls are properly serialized
+  /// against each other, including inserted libcalls.
+  SDValue LastCALLSEQ_END;
+
+  /// IsLegalizingCall - This member is used *only* for purposes of providing
+  /// helpful assertions that a libcall isn't created while another call is
+  /// being legalized (which could lead to non-serialized call sequences).
+  bool IsLegalizingCall;
+
+  enum LegalizeAction {
+    Legal,      // The target natively supports this operation.
+    Promote,    // This operation should be executed in a larger type.
+    Expand      // Try to expand this to other ops, otherwise use a libcall.
+  };
+
+  /// ValueTypeActions - This is a bitvector that contains two bits for each
+  /// value type, where the two bits correspond to the LegalizeAction enum.
+  /// This can be queried with "getTypeAction(VT)".
+  TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+  /// LegalizedNodes - For nodes that are of legal width, and that have more
+  /// than one use, this map indicates what regularized operand to use.  This
+  /// allows us to avoid legalizing the same thing more than once.
+  DenseMap<SDValue, SDValue> LegalizedNodes;
+
+  void AddLegalizedOperand(SDValue From, SDValue To) {
+    LegalizedNodes.insert(std::make_pair(From, To));
+    // If someone requests legalization of the new node, return itself.
+    if (From != To)
+      LegalizedNodes.insert(std::make_pair(To, To));
+  }
+
+public:
+  SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
+
+  /// getTypeAction - Return how we should legalize values of this type, either
+  /// it is already legal or we need to expand it into multiple registers of
+  /// smaller integer type, or we need to promote it to a larger type.
+  LegalizeAction getTypeAction(EVT VT) const {
+    return
+        (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT);
+  }
+
+  /// isTypeLegal - Return true if this type is legal on this target.
+  ///
+  bool isTypeLegal(EVT VT) const {
+    return getTypeAction(VT) == Legal;
+  }
+
+  void LegalizeDAG();
+
+private:
+  /// LegalizeOp - We know that the specified value has a legal type.
+  /// Recursively ensure that the operands have legal types, then return the
+  /// result.
+  SDValue LegalizeOp(SDValue O);
+
+  SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+  /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+  /// insertion index for the INSERT_VECTOR_ELT instruction.  In this case, it
+  /// is necessary to spill the vector being inserted into to memory, perform
+  /// the insert there, and then read the result back.
+  SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+                                         SDValue Idx, DebugLoc dl);
+  SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+                                  SDValue Idx, DebugLoc dl);
+
+  /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+  /// performs the same shuffe in terms of order or result bytes, but on a type
+  /// whose vector element type is narrower than the original shuffle type.
+  /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+  SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+                                     SDValue N1, SDValue N2, 
+                                     SmallVectorImpl<int> &Mask) const;
+
+  bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                    SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+  void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+                             DebugLoc dl);
+
+  SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+  SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+                          RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+                          RTLIB::Libcall Call_PPCF128);
+  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+                           RTLIB::Libcall Call_I8,
+                           RTLIB::Libcall Call_I16,
+                           RTLIB::Libcall Call_I32,
+                           RTLIB::Libcall Call_I64,
+                           RTLIB::Libcall Call_I128);
+
+  SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
+  SDValue ExpandBUILD_VECTOR(SDNode *Node);
+  SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+  void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+                                SmallVectorImpl<SDValue> &Results);
+  SDValue ExpandFCOPYSIGN(SDNode *Node);
+  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+                               DebugLoc dl);
+  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+                                DebugLoc dl);
+  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+                                DebugLoc dl);
+
+  SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+  SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+  SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+  void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue 
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl, 
+                                                 SDValue N1, SDValue N2,
+                                             SmallVectorImpl<int> &Mask) const {
+  unsigned NumMaskElts = VT.getVectorNumElements();
+  unsigned NumDestElts = NVT.getVectorNumElements();
+  unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+  assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+  if (NumEltsGrowth == 1)
+    return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+  
+  SmallVector<int, 8> NewMask;
+  for (unsigned i = 0; i != NumMaskElts; ++i) {
+    int Idx = Mask[i];
+    for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+      if (Idx < 0) 
+        NewMask.push_back(-1);
+      else
+        NewMask.push_back(Idx * NumEltsGrowth + j);
+    }
+  }
+  assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+  assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+  return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
+                                           CodeGenOpt::Level ol)
+  : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
+    ValueTypeActions(TLI.getValueTypeActions()) {
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+         "Too many value types for ValueTypeActions to hold!");
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+  LastCALLSEQ_END = DAG.getEntryNode();
+  IsLegalizingCall = false;
+
+  // The legalize process is inherently a bottom-up recursive process (users
+  // legalize their uses before themselves).  Given infinite stack space, we
+  // could just start legalizing on the root and traverse the whole graph.  In
+  // practice however, this causes us to run out of stack space on large basic
+  // blocks.  To avoid this problem, compute an ordering of the nodes where each
+  // node is only legalized after all of its operands are legalized.
+  DAG.AssignTopologicalOrder();
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+    LegalizeOp(SDValue(I, 0));
+
+  // Finally, it's possible the root changed.  Get the new root.
+  SDValue OldRoot = DAG.getRoot();
+  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+  DAG.setRoot(LegalizedNodes[OldRoot]);
+
+  LegalizedNodes.clear();
+
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node) {
+  if (Node->getOpcode() == ISD::CALLSEQ_END)
+    return Node;
+  if (Node->use_empty())
+    return 0;   // No CallSeqEnd
+
+  // The chain is usually at the end.
+  SDValue TheChain(Node, Node->getNumValues()-1);
+  if (TheChain.getValueType() != MVT::Other) {
+    // Sometimes it's at the beginning.
+    TheChain = SDValue(Node, 0);
+    if (TheChain.getValueType() != MVT::Other) {
+      // Otherwise, hunt for it.
+      for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+        if (Node->getValueType(i) == MVT::Other) {
+          TheChain = SDValue(Node, i);
+          break;
+        }
+
+      // Otherwise, we walked into a node without a chain.
+      if (TheChain.getValueType() != MVT::Other)
+        return 0;
+    }
+  }
+
+  for (SDNode::use_iterator UI = Node->use_begin(),
+       E = Node->use_end(); UI != E; ++UI) {
+
+    // Make sure to only follow users of our token chain.
+    SDNode *User = *UI;
+    for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+      if (User->getOperand(i) == TheChain)
+        if (SDNode *Result = FindCallEndFromCallStart(User))
+          return Result;
+  }
+  return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+  assert(Node && "Didn't find callseq_start for a call??");
+  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+
+  assert(Node->getOperand(0).getValueType() == MVT::Other &&
+         "Node doesn't have a token chain argument!");
+  return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest.  If no dest operands can get to dest,
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo.  This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+  if (N == Dest) return true;  // N certainly leads to Dest :)
+
+  // If we've already processed this node and it does lead to Dest, there is no
+  // need to reprocess it.
+  if (NodesLeadingTo.count(N)) return true;
+
+  // If the first result of this node has been already legalized, then it cannot
+  // reach N.
+  if (LegalizedNodes.count(SDValue(N, 0))) return false;
+
+  // Okay, this node has not already been legalized.  Check and legalize all
+  // operands.  If none lead to Dest, then we can legalize this node.
+  bool OperandsLeadToDest = false;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    OperandsLeadToDest |=     // If an operand leads to Dest, so do we.
+      LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo);
+
+  if (OperandsLeadToDest) {
+    NodesLeadingTo.insert(N);
+    return true;
+  }
+
+  // Okay, this node looks safe, legalize it and return false.
+  LegalizeOp(SDValue(N, 0));
+  return false;
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+                                SelectionDAG &DAG, const TargetLowering &TLI) {
+  bool Extend = false;
+  DebugLoc dl = CFP->getDebugLoc();
+
+  // If a FP immediate is precise when represented as a float and if the
+  // target can do an extending load from float to double, we put it into
+  // the constant pool as a float, even if it's is statically typed as a
+  // double.  This shrinks FP constants and canonicalizes them for targets where
+  // an FP extending load is the same cost as a normal load (such as on the x87
+  // fp stack or PPC FP unit).
+  EVT VT = CFP->getValueType(0);
+  ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+  if (!UseCP) {
+    assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+    return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+                           (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+  }
+
+  EVT OrigVT = VT;
+  EVT SVT = VT;
+  while (SVT != MVT::f32) {
+    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+    if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&
+        // Only do this if the target has a native EXTLOAD instruction from
+        // smaller type.
+        TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+        TLI.ShouldShrinkFPConstant(OrigVT)) {
+      const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+      LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+      VT = SVT;
+      Extend = true;
+    }
+  }
+
+  SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  if (Extend)
+    return DAG.getExtLoad(ISD::EXTLOAD, dl,
+                          OrigVT, DAG.getEntryNode(),
+                          CPIdx, PseudoSourceValue::getConstantPool(),
+                          0, VT, false, Alignment);
+  return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+                     PseudoSourceValue::getConstantPool(), 0, false, Alignment);
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static
+SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+                             const TargetLowering &TLI) {
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+  SDValue Val = ST->getValue();
+  EVT VT = Val.getValueType();
+  int Alignment = ST->getAlignment();
+  int SVOffset = ST->getSrcValueOffset();
+  DebugLoc dl = ST->getDebugLoc();
+  if (ST->getMemoryVT().isFloatingPoint() ||
+      ST->getMemoryVT().isVector()) {
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (TLI.isTypeLegal(intVT)) {
+      // Expand to a bitconvert of the value to the integer type of the
+      // same size, then a (misaligned) int store.
+      // FIXME: Does not handle truncating floating point stores!
+      SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
+      return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
+                          SVOffset, ST->isVolatile(), Alignment);
+    } else {
+      // Do a (aligned) store to a stack slot, then copy from the stack slot
+      // to the final destination using (unaligned) integer loads and stores.
+      EVT StoredVT = ST->getMemoryVT();
+      EVT RegVT =
+        TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits()));
+      unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+      unsigned RegBytes = RegVT.getSizeInBits() / 8;
+      unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+      // Make sure the stack slot is also aligned for the register type.
+      SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+      // Perform the original store, only redirected to the stack slot.
+      SDValue Store = DAG.getTruncStore(Chain, dl,
+                                        Val, StackPtr, NULL, 0, StoredVT);
+      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+      SmallVector<SDValue, 8> Stores;
+      unsigned Offset = 0;
+
+      // Do all but one copies using the full register width.
+      for (unsigned i = 1; i < NumRegs; i++) {
+        // Load one integer register's worth from the stack slot.
+        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);
+        // Store it to the final location.  Remember the store.
+        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+                                      ST->getSrcValue(), SVOffset + Offset,
+                                      ST->isVolatile(),
+                                      MinAlign(ST->getAlignment(), Offset)));
+        // Increment the pointers.
+        Offset += RegBytes;
+        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                               Increment);
+        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+      }
+
+      // The last store may be partial.  Do a truncating store.  On big-endian
+      // machines this requires an extending load from the stack slot to ensure
+      // that the bits are in the right place.
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
+
+      // Load from the stack slot.
+      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+                                    NULL, 0, MemVT);
+
+      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+                                         ST->getSrcValue(), SVOffset + Offset,
+                                         MemVT, ST->isVolatile(),
+                                         MinAlign(ST->getAlignment(), Offset)));
+      // The order of the stores doesn't matter - say it with a TokenFactor.
+      return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                         Stores.size());
+    }
+  }
+  assert(ST->getMemoryVT().isInteger() &&
+         !ST->getMemoryVT().isVector() &&
+         "Unaligned store of unknown type.");
+  // Get the half-size VT
+  EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+  int NumBits = NewStoredVT.getSizeInBits();
+  int IncrementSize = NumBits / 8;
+
+  // Divide the stored value in two parts.
+  SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+  SDValue Lo = Val;
+  SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+  // Store the two parts
+  SDValue Store1, Store2;
+  Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+                             ST->getSrcValue(), SVOffset, NewStoredVT,
+                             ST->isVolatile(), Alignment);
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+  Alignment = MinAlign(Alignment, IncrementSize);
+  Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+                             ST->getSrcValue(), SVOffset + IncrementSize,
+                             NewStoredVT, ST->isVolatile(), Alignment);
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static
+SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+                            const TargetLowering &TLI) {
+  int SVOffset = LD->getSrcValueOffset();
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  EVT VT = LD->getValueType(0);
+  EVT LoadedVT = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  if (VT.isFloatingPoint() || VT.isVector()) {
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+    if (TLI.isTypeLegal(intVT)) {
+      // Expand to a (misaligned) integer load of the same size,
+      // then bitconvert to floating point or vector.
+      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
+                                    SVOffset, LD->isVolatile(),
+                                    LD->getAlignment());
+      SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+      if (VT.isFloatingPoint() && LoadedVT != VT)
+        Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+
+      SDValue Ops[] = { Result, Chain };
+      return DAG.getMergeValues(Ops, 2, dl);
+    } else {
+      // Copy the value to a (aligned) stack slot using (unaligned) integer
+      // loads and stores, then do a (aligned) load from the stack slot.
+      EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+      unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+      unsigned RegBytes = RegVT.getSizeInBits() / 8;
+      unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+      // Make sure the stack slot is also aligned for the register type.
+      SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+      SmallVector<SDValue, 8> Stores;
+      SDValue StackPtr = StackBase;
+      unsigned Offset = 0;
+
+      // Do all but one copies using the full register width.
+      for (unsigned i = 1; i < NumRegs; i++) {
+        // Load one integer register's worth from the original location.
+        SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
+                                   SVOffset + Offset, LD->isVolatile(),
+                                   MinAlign(LD->getAlignment(), Offset));
+        // Follow the load with a store to the stack slot.  Remember the store.
+        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+                                      NULL, 0));
+        // Increment the pointers.
+        Offset += RegBytes;
+        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                               Increment);
+      }
+
+      // The last copy may be partial.  Do an extending load.
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset));
+      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+                                    LD->getSrcValue(), SVOffset + Offset,
+                                    MemVT, LD->isVolatile(),
+                                    MinAlign(LD->getAlignment(), Offset));
+      // Follow the load with a store to the stack slot.  Remember the store.
+      // On big-endian machines this requires a truncating store to ensure
+      // that the bits end up in the right place.
+      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+                                         NULL, 0, MemVT));
+
+      // The order of the stores doesn't matter - say it with a TokenFactor.
+      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                               Stores.size());
+
+      // Finally, perform the original load only redirected to the stack slot.
+      Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+                            NULL, 0, LoadedVT);
+
+      // Callers expect a MERGE_VALUES node.
+      SDValue Ops[] = { Load, TF };
+      return DAG.getMergeValues(Ops, 2, dl);
+    }
+  }
+  assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+         "Unaligned load of unsupported type.");
+
+  // Compute the new VT that is half the size of the old one.  This is an
+  // integer MVT.
+  unsigned NumBits = LoadedVT.getSizeInBits();
+  EVT NewLoadedVT;
+  NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+  NumBits >>= 1;
+
+  unsigned Alignment = LD->getAlignment();
+  unsigned IncrementSize = NumBits / 8;
+  ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+  // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+  if (HiExtType == ISD::NON_EXTLOAD)
+    HiExtType = ISD::ZEXTLOAD;
+
+  // Load the value in two parts
+  SDValue Lo, Hi;
+  if (TLI.isLittleEndian()) {
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+                        SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+                        SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+                        MinAlign(Alignment, IncrementSize));
+  } else {
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+                        SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+                        SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+                        MinAlign(Alignment, IncrementSize));
+  }
+
+  // aggregate the two parts
+  SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+  SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+  Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                             Hi.getValue(1));
+
+  SDValue Ops[] = { Result, TF };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction.  In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+                               DebugLoc dl) {
+  SDValue Tmp1 = Vec;
+  SDValue Tmp2 = Val;
+  SDValue Tmp3 = Idx;
+
+  // If the target doesn't support this, we have to spill the input vector
+  // to a temporary stack slot, update the element, then reload it.  This is
+  // badness.  We could also load the value into a vector register (either
+  // with a "move to register" or "extload into register" instruction, then
+  // permute it into place, if the idx is a constant and if the idx is
+  // supported by the target.
+  EVT VT    = Tmp1.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  EVT IdxVT = Tmp3.getValueType();
+  EVT PtrVT = TLI.getPointerTy();
+  SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+  // Store the vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+                            PseudoSourceValue::getFixedStack(SPFI), 0);
+
+  // Truncate or zero extend offset to target pointer type.
+  unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+  Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+  // Add the offset to the index.
+  unsigned EltSize = EltVT.getSizeInBits()/8;
+  Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+  SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+  // Store the scalar value.
+  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
+                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);
+  // Load the updated vector.
+  return DAG.getLoad(VT, dl, Ch, StackPtr,
+                     PseudoSourceValue::getFixedStack(SPFI), 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+  if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+    // SCALAR_TO_VECTOR requires that the type of the value being inserted
+    // match the element type of the vector being created, except for
+    // integers in which case the inserted value can be over width.
+    EVT EltVT = Vec.getValueType().getVectorElementType();
+    if (Val.getValueType() == EltVT ||
+        (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+      SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                  Vec.getValueType(), Val);
+
+      unsigned NumElts = Vec.getValueType().getVectorNumElements();
+      // We generate a shuffle of InVec and ScVec, so the shuffle mask
+      // should be 0,1,2,3,4,5... with the appropriate element replaced with
+      // elt 0 of the RHS.
+      SmallVector<int, 8> ShufOps;
+      for (unsigned i = 0; i != NumElts; ++i)
+        ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+      return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+                                  &ShufOps[0]);
+    }
+  }
+  return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  // FIXME: We shouldn't do this for TargetConstantFP's.
+  // FIXME: move this to the DAG Combiner!  Note that we can't regress due
+  // to phase ordering between legalized code and the dag combiner.  This
+  // probably means that we need to integrate dag combiner and legalizer
+  // together.
+  // We generally can't do this one for long doubles.
+  SDValue Tmp1 = ST->getChain();
+  SDValue Tmp2 = ST->getBasePtr();
+  SDValue Tmp3;
+  int SVOffset = ST->getSrcValueOffset();
+  unsigned Alignment = ST->getAlignment();
+  bool isVolatile = ST->isVolatile();
+  DebugLoc dl = ST->getDebugLoc();
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+    if (CFP->getValueType(0) == MVT::f32 &&
+        getTypeAction(MVT::i32) == Legal) {
+      Tmp3 = DAG.getConstant(CFP->getValueAPF().
+                                      bitcastToAPInt().zextOrTrunc(32),
+                              MVT::i32);
+      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+    } else if (CFP->getValueType(0) == MVT::f64) {
+      // If this target supports 64-bit registers, do a single 64-bit store.
+      if (getTypeAction(MVT::i64) == Legal) {
+        Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                                  zextOrTrunc(64), MVT::i64);
+        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                            SVOffset, isVolatile, Alignment);
+      } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+        // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+        // stores.  If the target supports neither 32- nor 64-bits, this
+        // xform is certainly not worth it.
+        const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+        SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+        if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                            DAG.getIntPtrConstant(4));
+        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+                          isVolatile, MinAlign(Alignment, 4U));
+
+        return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+      }
+    }
+  }
+  return SDValue();
+}
+
+/// LegalizeOp - We know that the specified value has a legal type, and
+/// that its operands are legal.  Now ensure that the operation itself
+/// is legal, recursively ensuring that the operands' operations remain
+/// legal.
+SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
+  if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+    return Op;
+
+  SDNode *Node = Op.getNode();
+  DebugLoc dl = Node->getDebugLoc();
+
+  for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+    assert(getTypeAction(Node->getValueType(i)) == Legal &&
+           "Unexpected illegal type!");
+
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+    assert((isTypeLegal(Node->getOperand(i).getValueType()) || 
+            Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+           "Unexpected illegal type!");
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+  if (I != LegalizedNodes.end()) return I->second;
+
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+  SDValue Result = Op;
+  bool isCustom = false;
+
+  // Figure out the correct action; the way to query this varies by opcode
+  TargetLowering::LegalizeAction Action;
+  bool SimpleFinishLegalizing = true;
+  switch (Node->getOpcode()) {
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::VAARG:
+  case ISD::STACKSAVE:
+    Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::EXTRACT_VECTOR_ELT:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getOperand(0).getValueType());
+    break;
+  case ISD::FP_ROUND_INREG:
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+    break;
+  }
+  case ISD::SELECT_CC:
+  case ISD::SETCC:
+  case ISD::BR_CC: {
+    unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+                         Node->getOpcode() == ISD::SETCC ? 2 : 1;
+    unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+    EVT OpVT = Node->getOperand(CompareOperand).getValueType();
+    ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+    Action = TLI.getCondCodeAction(CCCode, OpVT);
+    if (Action == TargetLowering::Legal) {
+      if (Node->getOpcode() == ISD::SELECT_CC)
+        Action = TLI.getOperationAction(Node->getOpcode(),
+                                        Node->getValueType(0));
+      else
+        Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+    }
+    break;
+  }
+  case ISD::LOAD:
+  case ISD::STORE:
+    // FIXME: Model these properly.  LOAD and STORE are complicated, and
+    // STORE expects the unlegalized operand in some cases.
+    SimpleFinishLegalizing = false;
+    break;
+  case ISD::CALLSEQ_START:
+  case ISD::CALLSEQ_END:
+    // FIXME: This shouldn't be necessary.  These nodes have special properties
+    // dealing with the recursive nature of legalization.  Removing this
+    // special case should be done as part of making LegalizeDAG non-recursive.
+    SimpleFinishLegalizing = false;
+    break;
+  case ISD::EXTRACT_ELEMENT:
+  case ISD::FLT_ROUNDS_:
+  case ISD::SADDO:
+  case ISD::SSUBO:
+  case ISD::UADDO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+  case ISD::FPOWI:
+  case ISD::MERGE_VALUES:
+  case ISD::EH_RETURN:
+  case ISD::FRAME_TO_ARGS_OFFSET:
+    // These operations lie about being legal: when they claim to be legal,
+    // they should actually be expanded.
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    if (Action == TargetLowering::Legal)
+      Action = TargetLowering::Expand;
+    break;
+  case ISD::TRAMPOLINE:
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR:
+    // These operations lie about being legal: when they claim to be legal,
+    // they should actually be custom-lowered.
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    if (Action == TargetLowering::Legal)
+      Action = TargetLowering::Custom;
+    break;
+  case ISD::BUILD_VECTOR:
+    // A weird case: legalization for BUILD_VECTOR never legalizes the
+    // operands!
+    // FIXME: This really sucks... changing it isn't semantically incorrect,
+    // but it massively pessimizes the code for floating-point BUILD_VECTORs
+    // because ConstantFP operands get legalized into constant pool loads
+    // before the BUILD_VECTOR code can see them.  It doesn't usually bite,
+    // though, because BUILD_VECTORS usually get lowered into other nodes
+    // which get legalized properly.
+    SimpleFinishLegalizing = false;
+    break;
+  default:
+    if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+      Action = TargetLowering::Legal;
+    } else {
+      Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    }
+    break;
+  }
+
+  if (SimpleFinishLegalizing) {
+    SmallVector<SDValue, 8> Ops, ResultVals;
+    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+    switch (Node->getOpcode()) {
+    default: break;
+    case ISD::BR:
+    case ISD::BRIND:
+    case ISD::BR_JT:
+    case ISD::BR_CC:
+    case ISD::BRCOND:
+      // Branches tweak the chain to include LastCALLSEQ_END
+      Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
+                            LastCALLSEQ_END);
+      Ops[0] = LegalizeOp(Ops[0]);
+      LastCALLSEQ_END = DAG.getEntryNode();
+      break;
+    case ISD::SHL:
+    case ISD::SRL:
+    case ISD::SRA:
+    case ISD::ROTL:
+    case ISD::ROTR:
+      // Legalizing shifts/rotates requires adjusting the shift amount
+      // to the appropriate width.
+      if (!Ops[1].getValueType().isVector())
+        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+      break;
+    case ISD::SRL_PARTS:
+    case ISD::SRA_PARTS:
+    case ISD::SHL_PARTS:
+      // Legalizing shifts/rotates requires adjusting the shift amount
+      // to the appropriate width.
+      if (!Ops[2].getValueType().isVector())
+        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+      break;
+    }
+
+    Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),
+                                    Ops.size());
+    switch (Action) {
+    case TargetLowering::Legal:
+      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+        ResultVals.push_back(Result.getValue(i));
+      break;
+    case TargetLowering::Custom:
+      // FIXME: The handling for custom lowering with multiple results is
+      // a complete mess.
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.getNode()) {
+        for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+          if (e == 1)
+            ResultVals.push_back(Tmp1);
+          else
+            ResultVals.push_back(Tmp1.getValue(i));
+        }
+        break;
+      }
+
+      // FALL THROUGH
+    case TargetLowering::Expand:
+      ExpandNode(Result.getNode(), ResultVals);
+      break;
+    case TargetLowering::Promote:
+      PromoteNode(Result.getNode(), ResultVals);
+      break;
+    }
+    if (!ResultVals.empty()) {
+      for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
+        if (ResultVals[i] != SDValue(Node, i))
+          ResultVals[i] = LegalizeOp(ResultVals[i]);
+        AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
+      }
+      return ResultVals[Op.getResNo()];
+    }
+  }
+
+  switch (Node->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "NODE: ";
+    Node->dump( &DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to legalize this operator!");
+
+  case ISD::BUILD_VECTOR:
+    switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+    default: llvm_unreachable("This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.getNode()) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = ExpandBUILD_VECTOR(Result.getNode());
+      break;
+    }
+    break;
+  case ISD::CALLSEQ_START: {
+    SDNode *CallEnd = FindCallEndFromCallStart(Node);
+
+    // Recursively Legalize all of the inputs of the call end that do not lead
+    // to this call start.  This ensures that any libcalls that need be inserted
+    // are inserted *before* the CALLSEQ_START.
+    {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+    for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+      LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
+                                   NodesLeadingTo);
+    }
+
+    // Now that we legalized all of the inputs (which may have inserted
+    // libcalls) create the new CALLSEQ_START node.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    // Merge in the last call, to ensure that this call start after the last
+    // call ended.
+    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+      Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                         Tmp1, LastCALLSEQ_END);
+      Tmp1 = LegalizeOp(Tmp1);
+    }
+
+    // Do not try to legalize the target-specific arguments (#1+).
+    if (Tmp1 != Node->getOperand(0)) {
+      SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+      Ops[0] = Tmp1;
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+    }
+
+    // Remember that the CALLSEQ_START is legalized.
+    AddLegalizedOperand(Op.getValue(0), Result);
+    if (Node->getNumValues() == 2)    // If this has a flag result, remember it.
+      AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+
+    // Now that the callseq_start and all of the non-call nodes above this call
+    // sequence have been legalized, legalize the call itself.  During this
+    // process, no libcalls can/will be inserted, guaranteeing that no calls
+    // can overlap.
+    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+    // Note that we are selecting this call!
+    LastCALLSEQ_END = SDValue(CallEnd, 0);
+    IsLegalizingCall = true;
+
+    // Legalize the call, starting from the CALLSEQ_END.
+    LegalizeOp(LastCALLSEQ_END);
+    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+    return Result;
+  }
+  case ISD::CALLSEQ_END:
+    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
+    // will cause this node to be legalized as well as handling libcalls right.
+    if (LastCALLSEQ_END.getNode() != Node) {
+      LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
+      DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+      assert(I != LegalizedNodes.end() &&
+             "Legalizing the call start should have legalized this node!");
+      return I->second;
+    }
+
+    // Otherwise, the call start has been legalized and everything is going
+    // according to plan.  Just legalize ourselves normally here.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Do not try to legalize the target-specific arguments (#1+), except for
+    // an optional flag input.
+    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+      if (Tmp1 != Node->getOperand(0)) {
+        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      }
+    } else {
+      Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+      if (Tmp1 != Node->getOperand(0) ||
+          Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Ops.back() = Tmp2;
+        Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      }
+    }
+    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
+    // This finishes up call legalization.
+    IsLegalizingCall = false;
+
+    // If the CALLSEQ_END node has a flag, remember that we legalized it.
+    AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
+    if (Node->getNumValues() == 2)
+      AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.getResNo());
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    Tmp1 = LegalizeOp(LD->getChain());   // Legalize the chain.
+    Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    if (ExtType == ISD::NON_EXTLOAD) {
+      EVT VT = Node->getValueType(0);
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+      Tmp3 = Result.getValue(0);
+      Tmp4 = Result.getValue(1);
+
+      switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+      default: llvm_unreachable("This action is not supported yet!");
+      case TargetLowering::Legal:
+        // If this is an unaligned load and the target doesn't support it,
+        // expand it.
+        if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+          const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+          unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+          if (LD->getAlignment() < ABIAlignment){
+            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), 
+                                         DAG, TLI);
+            Tmp3 = Result.getOperand(0);
+            Tmp4 = Result.getOperand(1);
+            Tmp3 = LegalizeOp(Tmp3);
+            Tmp4 = LegalizeOp(Tmp4);
+          }
+        }
+        break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+        if (Tmp1.getNode()) {
+          Tmp3 = LegalizeOp(Tmp1);
+          Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        }
+        break;
+      case TargetLowering::Promote: {
+        // Only promote a load of vector type to another.
+        assert(VT.isVector() && "Cannot promote this load!");
+        // Change base type to a different vector type.
+        EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
+                           LD->getSrcValueOffset(),
+                           LD->isVolatile(), LD->getAlignment());
+        Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+        Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        break;
+      }
+      }
+      // Since loads produce two values, make sure to remember that we
+      // legalized both of them.
+      AddLegalizedOperand(SDValue(Node, 0), Tmp3);
+      AddLegalizedOperand(SDValue(Node, 1), Tmp4);
+      return Op.getResNo() ? Tmp4 : Tmp3;
+    } else {
+      EVT SrcVT = LD->getMemoryVT();
+      unsigned SrcWidth = SrcVT.getSizeInBits();
+      int SVOffset = LD->getSrcValueOffset();
+      unsigned Alignment = LD->getAlignment();
+      bool isVolatile = LD->isVolatile();
+
+      if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+          // Some targets pretend to have an i1 loading operation, and actually
+          // load an i8.  This trick is correct for ZEXTLOAD because the top 7
+          // bits are guaranteed to be zero; it helps the optimizers understand
+          // that these bits are zero.  It is also useful for EXTLOAD, since it
+          // tells the optimizers that those bits are undefined.  It would be
+          // nice to have an effective generic way of getting these benefits...
+          // Until such a way is found, don't insist on promoting i1 here.
+          (SrcVT != MVT::i1 ||
+           TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+        // Promote to a byte-sized load if not loading an integral number of
+        // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+        unsigned NewWidth = SrcVT.getStoreSizeInBits();
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+        SDValue Ch;
+
+        // The extra bits are guaranteed to be zero, since we stored them that
+        // way.  A zext load from NVT thus automatically gives zext from SrcVT.
+
+        ISD::LoadExtType NewExtType =
+          ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+        Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+                                Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
+                                NVT, isVolatile, Alignment);
+
+        Ch = Result.getValue(1); // The chain.
+
+        if (ExtType == ISD::SEXTLOAD)
+          // Having the top bits zero doesn't help when sign extending.
+          Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                               Result.getValueType(),
+                               Result, DAG.getValueType(SrcVT));
+        else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+          // All the top bits are guaranteed to be zero - inform the optimizers.
+          Result = DAG.getNode(ISD::AssertZext, dl,
+                               Result.getValueType(), Result,
+                               DAG.getValueType(SrcVT));
+
+        Tmp1 = LegalizeOp(Result);
+        Tmp2 = LegalizeOp(Ch);
+      } else if (SrcWidth & (SrcWidth - 1)) {
+        // If not loading a power-of-2 number of bits, expand as two loads.
+        assert(!SrcVT.isVector() && "Unsupported extload!");
+        unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+        assert(RoundWidth < SrcWidth);
+        unsigned ExtraWidth = SrcWidth - RoundWidth;
+        assert(ExtraWidth < RoundWidth);
+        assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+               "Load size not an integral number of bytes!");
+        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+        SDValue Lo, Hi, Ch;
+        unsigned IncrementSize;
+
+        if (TLI.isLittleEndian()) {
+          // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+          // Load the bottom RoundWidth bits.
+          Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
+                              Node->getValueType(0), Tmp1, Tmp2,
+                              LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
+                              Alignment);
+
+          // Load the remaining ExtraWidth bits.
+          IncrementSize = RoundWidth / 8;
+          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                             DAG.getIntPtrConstant(IncrementSize));
+          Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                              LD->getSrcValue(), SVOffset + IncrementSize,
+                              ExtraVT, isVolatile,
+                              MinAlign(Alignment, IncrementSize));
+
+          // Build a factor node to remember that this load is independent of the
+          // other one.
+          Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                           Hi.getValue(1));
+
+          // Move the top bits to the right place.
+          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                           DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+          // Join the hi and lo parts.
+          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+        } else {
+          // Big endian - avoid unaligned loads.
+          // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+          // Load the top RoundWidth bits.
+          Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                              LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
+                              Alignment);
+
+          // Load the remaining ExtraWidth bits.
+          IncrementSize = RoundWidth / 8;
+          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                             DAG.getIntPtrConstant(IncrementSize));
+          Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
+                              Node->getValueType(0), Tmp1, Tmp2,
+                              LD->getSrcValue(), SVOffset + IncrementSize,
+                              ExtraVT, isVolatile,
+                              MinAlign(Alignment, IncrementSize));
+
+          // Build a factor node to remember that this load is independent of the
+          // other one.
+          Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                           Hi.getValue(1));
+
+          // Move the top bits to the right place.
+          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                           DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+          // Join the hi and lo parts.
+          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+        }
+
+        Tmp1 = LegalizeOp(Result);
+        Tmp2 = LegalizeOp(Ch);
+      } else {
+        switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+        default: llvm_unreachable("This action is not supported yet!");
+        case TargetLowering::Custom:
+          isCustom = true;
+          // FALLTHROUGH
+        case TargetLowering::Legal:
+          Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+          Tmp1 = Result.getValue(0);
+          Tmp2 = Result.getValue(1);
+
+          if (isCustom) {
+            Tmp3 = TLI.LowerOperation(Result, DAG);
+            if (Tmp3.getNode()) {
+              Tmp1 = LegalizeOp(Tmp3);
+              Tmp2 = LegalizeOp(Tmp3.getValue(1));
+            }
+          } else {
+            // If this is an unaligned load and the target doesn't support it,
+            // expand it.
+            if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+              const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+              unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+              if (LD->getAlignment() < ABIAlignment){
+                Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), 
+                                             DAG, TLI);
+                Tmp1 = Result.getOperand(0);
+                Tmp2 = Result.getOperand(1);
+                Tmp1 = LegalizeOp(Tmp1);
+                Tmp2 = LegalizeOp(Tmp2);
+              }
+            }
+          }
+          break;
+        case TargetLowering::Expand:
+          // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+          // f128 = EXTLOAD {f32,f64} too
+          if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 ||
+                                     Node->getValueType(0) == MVT::f128)) ||
+              (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
+            SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
+                                       LD->getSrcValueOffset(),
+                                       LD->isVolatile(), LD->getAlignment());
+            Result = DAG.getNode(ISD::FP_EXTEND, dl,
+                                 Node->getValueType(0), Load);
+            Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
+            Tmp2 = LegalizeOp(Load.getValue(1));
+            break;
+          }
+          assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+          // Turn the unsupported load into an EXTLOAD followed by an explicit
+          // zero/sign extend inreg.
+          Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+                                  Tmp1, Tmp2, LD->getSrcValue(),
+                                  LD->getSrcValueOffset(), SrcVT,
+                                  LD->isVolatile(), LD->getAlignment());
+          SDValue ValRes;
+          if (ExtType == ISD::SEXTLOAD)
+            ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                                 Result.getValueType(),
+                                 Result, DAG.getValueType(SrcVT));
+          else
+            ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+          break;
+        }
+      }
+
+      // Since loads produce two values, make sure to remember that we legalized
+      // both of them.
+      AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+      AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+      return Op.getResNo() ? Tmp2 : Tmp1;
+    }
+  }
+  case ISD::STORE: {
+    StoreSDNode *ST = cast<StoreSDNode>(Node);
+    Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
+    Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
+    int SVOffset = ST->getSrcValueOffset();
+    unsigned Alignment = ST->getAlignment();
+    bool isVolatile = ST->isVolatile();
+
+    if (!ST->isTruncatingStore()) {
+      if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+        Result = SDValue(OptStore, 0);
+        break;
+      }
+
+      {
+        Tmp3 = LegalizeOp(ST->getValue());
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+                                        ST->getOffset());
+
+        EVT VT = Tmp3.getValueType();
+        switch (TLI.getOperationAction(ISD::STORE, VT)) {
+        default: llvm_unreachable("This action is not supported yet!");
+        case TargetLowering::Legal:
+          // If this is an unaligned store and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (ST->getAlignment() < ABIAlignment)
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
+          }
+          break;
+        case TargetLowering::Custom:
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.getNode()) Result = Tmp1;
+          break;
+        case TargetLowering::Promote:
+          assert(VT.isVector() && "Unknown legal promote case!");
+          Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+                                ST->getSrcValue(), SVOffset, isVolatile,
+                                Alignment);
+          break;
+        }
+        break;
+      }
+    } else {
+      Tmp3 = LegalizeOp(ST->getValue());
+
+      EVT StVT = ST->getMemoryVT();
+      unsigned StWidth = StVT.getSizeInBits();
+
+      if (StWidth != StVT.getStoreSizeInBits()) {
+        // Promote to a byte-sized store with upper bits zero if not
+        // storing an integral number of bytes.  For example, promote
+        // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
+        Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
+        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                                   SVOffset, NVT, isVolatile, Alignment);
+      } else if (StWidth & (StWidth - 1)) {
+        // If not storing a power-of-2 number of bits, expand as two stores.
+        assert(!StVT.isVector() && "Unsupported truncstore!");
+        unsigned RoundWidth = 1 << Log2_32(StWidth);
+        assert(RoundWidth < StWidth);
+        unsigned ExtraWidth = StWidth - RoundWidth;
+        assert(ExtraWidth < RoundWidth);
+        assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+               "Store size not an integral number of bytes!");
+        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+        SDValue Lo, Hi;
+        unsigned IncrementSize;
+
+        if (TLI.isLittleEndian()) {
+          // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+          // Store the bottom RoundWidth bits.
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                                 SVOffset, RoundVT,
+                                 isVolatile, Alignment);
+
+          // Store the remaining ExtraWidth bits.
+          IncrementSize = RoundWidth / 8;
+          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                             DAG.getIntPtrConstant(IncrementSize));
+          Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+                           DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
+                                 SVOffset + IncrementSize, ExtraVT, isVolatile,
+                                 MinAlign(Alignment, IncrementSize));
+        } else {
+          // Big endian - avoid unaligned stores.
+          // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+          // Store the top RoundWidth bits.
+          Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+                           DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
+                                 SVOffset, RoundVT, isVolatile, Alignment);
+
+          // Store the remaining ExtraWidth bits.
+          IncrementSize = RoundWidth / 8;
+          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                             DAG.getIntPtrConstant(IncrementSize));
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                                 SVOffset + IncrementSize, ExtraVT, isVolatile,
+                                 MinAlign(Alignment, IncrementSize));
+        }
+
+        // The order of the stores doesn't matter.
+        Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+      } else {
+        if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+            Tmp2 != ST->getBasePtr())
+          Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+                                          ST->getOffset());
+
+        switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+        default: llvm_unreachable("This action is not supported yet!");
+        case TargetLowering::Legal:
+          // If this is an unaligned store and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (ST->getAlignment() < ABIAlignment)
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
+          }
+          break;
+        case TargetLowering::Custom:
+          Result = TLI.LowerOperation(Result, DAG);
+          break;
+        case Expand:
+          // TRUNCSTORE:i16 i32 -> STORE i16
+          assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+          Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
+          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                                SVOffset, isVolatile, Alignment);
+          break;
+        }
+      }
+    }
+    break;
+  }
+  }
+  assert(Result.getValueType() == Op.getValueType() &&
+         "Bad legalization!");
+
+  // Make sure that the generated code is itself legal.
+  if (Result != Op)
+    Result = LegalizeOp(Result);
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
+  return Result;
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+  SDValue Vec = Op.getOperand(0);
+  SDValue Idx = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+  else
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+  if (Op.getValueType().isVector())
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+  else
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+                          NULL, 0, Vec.getValueType().getVectorElementType());
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+  // We can't handle this case efficiently.  Allocate a sufficiently
+  // aligned object on the stack, store each element into it, then load
+  // the result as a vector.
+  // Create the stack frame object.
+  EVT VT = Node->getValueType(0);
+  EVT OpVT = Node->getOperand(0).getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue FIPtr = DAG.CreateStackTemporary(VT);
+  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+  const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+  // Emit a store of each element to the stack slot.
+  SmallVector<SDValue, 8> Stores;
+  unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+  // Store (in the right endianness) the elements to memory.
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    // Ignore undef elements.
+    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+    unsigned Offset = TypeByteSize*i;
+
+    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+    // If EltVT smaller than OpVT, only store the bits necessary.
+    if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
+      Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+                          Node->getOperand(i), Idx, SV, Offset, EltVT));
+    } else
+      Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, 
+                                    Node->getOperand(i), Idx, SV, Offset));
+  }
+
+  SDValue StoreChain;
+  if (!Stores.empty())    // Not all undef elements?
+    StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                             &Stores[0], Stores.size());
+  else
+    StoreChain = DAG.getEntryNode();
+
+  // Result is a load from the stack slot.
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1 = Node->getOperand(0);
+  SDValue Tmp2 = Node->getOperand(1);
+  assert((Tmp2.getValueType() == MVT::f32 ||
+          Tmp2.getValueType() == MVT::f64) &&
+          "Ugly special-cased code!");
+  // Get the sign bit of the RHS.
+  SDValue SignBit;
+  EVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
+  if (isTypeLegal(IVT)) {
+    SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+  } else {
+    assert(isTypeLegal(TLI.getPointerTy()) &&
+            (TLI.getPointerTy() == MVT::i32 || 
+            TLI.getPointerTy() == MVT::i64) &&
+            "Legal type for load?!");
+    SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
+    SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
+    SDValue Ch =
+        DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);
+    if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
+      LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
+                            LoadPtr, DAG.getIntPtrConstant(4));
+    SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
+                              Ch, LoadPtr, NULL, 0, MVT::i32);
+  }
+  SignBit =
+      DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+                    SignBit, DAG.getConstant(0, SignBit.getValueType()),
+                    ISD::SETLT);
+  // Get the absolute value of the result.
+  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+  // Select between the nabs and abs value based on the sign bit of
+  // the input.
+  return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+                     DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+                     AbsVal);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+                                           SmallVectorImpl<SDValue> &Results) {
+  unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+  assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+          " not tell us which reg is the stack pointer!");
+  DebugLoc dl = Node->getDebugLoc();
+  EVT VT = Node->getValueType(0);
+  SDValue Tmp1 = SDValue(Node, 0);
+  SDValue Tmp2 = SDValue(Node, 1);
+  SDValue Tmp3 = Node->getOperand(2);
+  SDValue Chain = Tmp1.getOperand(0);
+
+  // Chain the dynamic stack allocation so that it doesn't modify the stack
+  // pointer when other instructions are using the stack.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+  SDValue Size  = Tmp2.getOperand(1);
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+  Chain = SP.getValue(1);
+  unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+  unsigned StackAlign =
+    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  if (Align > StackAlign)
+    SP = DAG.getNode(ISD::AND, dl, VT, SP,
+                      DAG.getConstant(-(uint64_t)Align, VT));
+  Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size);       // Value
+  Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1);     // Output chain
+
+  Tmp2 = DAG.getCALLSEQ_END(Chain,  DAG.getIntPtrConstant(0, true),
+                            DAG.getIntPtrConstant(0, true), SDValue());
+
+  Results.push_back(Tmp1);
+  Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+                                                 SDValue &LHS, SDValue &RHS,
+                                                 SDValue &CC,
+                                                 DebugLoc dl) {
+  EVT OpVT = LHS.getValueType();
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+  switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+  default: llvm_unreachable("Unknown condition code action!");
+  case TargetLowering::Legal:
+    // Nothing to do.
+    break;
+  case TargetLowering::Expand: {
+    ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+    unsigned Opc = 0;
+    switch (CCCode) {
+    default: llvm_unreachable("Don't know how to expand this condition!");
+    case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    // FIXME: Implement more expansions.
+    }
+
+    SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+    SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+    RHS = SDValue();
+    CC  = SDValue();
+    break;
+  }
+  }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack.  This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed.  It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+                                               EVT SlotVT,
+                                               EVT DestVT,
+                                               DebugLoc dl) {
+  // Create the stack frame object.
+  unsigned SrcAlign =
+    TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+                                              getTypeForEVT(*DAG.getContext()));
+  SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+  FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+  int SPFI = StackPtrFI->getIndex();
+  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+
+  unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+  unsigned SlotSize = SlotVT.getSizeInBits();
+  unsigned DestSize = DestVT.getSizeInBits();
+  unsigned DestAlign =
+    TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext()));
+
+  // Emit a store to the stack slot.  Use a truncstore if the input value is
+  // later than DestVT.
+  SDValue Store;
+
+  if (SrcSize > SlotSize)
+    Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+                              SV, 0, SlotVT, false, SrcAlign);
+  else {
+    assert(SrcSize == SlotSize && "Invalid store");
+    Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+                         SV, 0, false, SrcAlign);
+  }
+
+  // Result is a load from the stack slot.
+  if (SlotSize == DestSize)
+    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);
+
+  assert(SlotSize < DestSize && "Unknown extension!");
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
+                        false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  // Create a vector sized/aligned stack slot, store the value to element #0,
+  // then load the whole vector back out.
+  SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+  FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+  int SPFI = StackPtrFI->getIndex();
+
+  SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+                                 StackPtr,
+                                 PseudoSourceValue::getFixedStack(SPFI), 0,
+                                 Node->getValueType(0).getVectorElementType());
+  return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+                     PseudoSourceValue::getFixedStack(SPFI), 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+  unsigned NumElems = Node->getNumOperands();
+  SDValue Value1, Value2;
+  DebugLoc dl = Node->getDebugLoc();
+  EVT VT = Node->getValueType(0);
+  EVT OpVT = Node->getOperand(0).getValueType();
+  EVT EltVT = VT.getVectorElementType();
+
+  // If the only non-undef value is the low element, turn this into a
+  // SCALAR_TO_VECTOR node.  If this is { X, X, X, X }, determine X.
+  bool isOnlyLowElement = true;
+  bool MoreThanTwoValues = false;
+  bool isConstant = true;
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue V = Node->getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    if (i > 0)
+      isOnlyLowElement = false;
+    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+      isConstant = false;
+
+    if (!Value1.getNode()) {
+      Value1 = V;
+    } else if (!Value2.getNode()) {
+      if (V != Value1)
+        Value2 = V;
+    } else if (V != Value1 && V != Value2) {
+      MoreThanTwoValues = true;
+    }
+  }
+
+  if (!Value1.getNode())
+    return DAG.getUNDEF(VT);
+
+  if (isOnlyLowElement)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+  // If all elements are constants, create a load from the constant pool.
+  if (isConstant) {
+    std::vector<Constant*> CV;
+    for (unsigned i = 0, e = NumElems; i != e; ++i) {
+      if (ConstantFPSDNode *V =
+          dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+        CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+      } else if (ConstantSDNode *V =
+                 dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+        if (OpVT==EltVT)
+          CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+        else {
+          // If OpVT and EltVT don't match, EltVT is not legal and the
+          // element values have been promoted/truncated earlier.  Undo this;
+          // we don't want a v16i8 to become a v16i32 for example.
+          const ConstantInt *CI = V->getConstantIntValue();
+          CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+                                        CI->getZExtValue()));
+        }
+      } else {
+        assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+        const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+        CV.push_back(UndefValue::get(OpNTy));
+      }
+    }
+    Constant *CP = ConstantVector::get(CV);
+    SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+    unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                       PseudoSourceValue::getConstantPool(), 0,
+                       false, Alignment);
+  }
+
+  if (!MoreThanTwoValues) {
+    SmallVector<int, 8> ShuffleVec(NumElems, -1);
+    for (unsigned i = 0; i < NumElems; ++i) {
+      SDValue V = Node->getOperand(i);
+      if (V.getOpcode() == ISD::UNDEF)
+        continue;
+      ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+    }
+    if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+      // Get the splatted value into the low element of a vector register.
+      SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+      SDValue Vec2;
+      if (Value2.getNode())
+        Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+      else
+        Vec2 = DAG.getUNDEF(VT);
+
+      // Return shuffle(LowValVec, undef, <0,0,0,0>)
+      return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+    }
+  }
+
+  // Otherwise, we can't handle this case efficiently.
+  return ExpandVectorBuildThroughStack(Node);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall.  If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument.  If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+                                            bool isSigned) {
+  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+  // The input chain to this libcall is the entry node of the function.
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDValue InChain = DAG.getEntryNode();
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG,
+                    Node->getDebugLoc(), DAG.GetOrdering(Node));
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  return CallInfo.first;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+                                              RTLIB::Libcall Call_F32,
+                                              RTLIB::Libcall Call_F64,
+                                              RTLIB::Libcall Call_F80,
+                                              RTLIB::Libcall Call_PPCF128) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::f32: LC = Call_F32; break;
+  case MVT::f64: LC = Call_F64; break;
+  case MVT::f80: LC = Call_F80; break;
+  case MVT::ppcf128: LC = Call_PPCF128; break;
+  }
+  return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+                                               RTLIB::Libcall Call_I8,
+                                               RTLIB::Libcall Call_I16,
+                                               RTLIB::Libcall Call_I32,
+                                               RTLIB::Libcall Call_I64,
+                                               RTLIB::Libcall Call_I128) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::i8:   LC = Call_I8; break;
+  case MVT::i16:  LC = Call_I16; break;
+  case MVT::i32:  LC = Call_I32; break;
+  case MVT::i64:  LC = Call_I64; break;
+  case MVT::i128: LC = Call_I128; break;
+  }
+  return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it.  At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+                                                   SDValue Op0,
+                                                   EVT DestVT,
+                                                   DebugLoc dl) {
+  if (Op0.getValueType() == MVT::i32) {
+    // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+    // Get the stack frame index of a 8 byte buffer.
+    SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+    // word offset constant for Hi/Lo address computation
+    SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+    // set up Hi and Lo (into buffer) address based on endian
+    SDValue Hi = StackSlot;
+    SDValue Lo = DAG.getNode(ISD::ADD, dl,
+                             TLI.getPointerTy(), StackSlot, WordOff);
+    if (TLI.isLittleEndian())
+      std::swap(Hi, Lo);
+
+    // if signed map to unsigned space
+    SDValue Op0Mapped;
+    if (isSigned) {
+      // constant used to invert sign bit (signed to unsigned mapping)
+      SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+      Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+    } else {
+      Op0Mapped = Op0;
+    }
+    // store the lo of the constructed double - based on integer input
+    SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+                                  Op0Mapped, Lo, NULL, 0);
+    // initial hi portion of constructed double
+    SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+    // store the hi of the constructed double - biased exponent
+    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);
+    // load the constructed double
+    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);
+    // FP constant to bias correct the final result
+    SDValue Bias = DAG.getConstantFP(isSigned ?
+                                     BitsToDouble(0x4330000080000000ULL) :
+                                     BitsToDouble(0x4330000000000000ULL),
+                                     MVT::f64);
+    // subtract the bias
+    SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+    // final result
+    SDValue Result;
+    // handle final rounding
+    if (DestVT == MVT::f64) {
+      // do nothing
+      Result = Sub;
+    } else if (DestVT.bitsLT(MVT::f64)) {
+      Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+                           DAG.getIntPtrConstant(0));
+    } else if (DestVT.bitsGT(MVT::f64)) {
+      Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+    }
+    return Result;
+  }
+  assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+  SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+  SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+                                 Op0, DAG.getConstant(0, Op0.getValueType()),
+                                 ISD::SETLT);
+  SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+  SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+                                    SignSet, Four, Zero);
+
+  // If the sign bit of the integer is set, the large number will be treated
+  // as a negative number.  To counteract this, the dynamic code adds an
+  // offset depending on the data type.
+  uint64_t FF;
+  switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unsupported integer type!");
+  case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
+  case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
+  case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
+  case MVT::i64: FF = 0x5F800000ULL; break;  // 2^64 (as a float)
+  }
+  if (TLI.isLittleEndian()) FF <<= 32;
+  Constant *FudgeFactor = ConstantInt::get(
+                                       Type::getInt64Ty(*DAG.getContext()), FF);
+
+  SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+  Alignment = std::min(Alignment, 4u);
+  SDValue FudgeInReg;
+  if (DestVT == MVT::f32)
+    FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+                             PseudoSourceValue::getConstantPool(), 0,
+                             false, Alignment);
+  else {
+    FudgeInReg =
+      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+                                DAG.getEntryNode(), CPIdx,
+                                PseudoSourceValue::getConstantPool(), 0,
+                                MVT::f32, false, Alignment));
+  }
+
+  return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+                                                    EVT DestVT,
+                                                    bool isSigned,
+                                                    DebugLoc dl) {
+  // First step, figure out the appropriate *INT_TO_FP operation to use.
+  EVT NewInTy = LegalOp.getValueType();
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+    assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+    // If the target supports SINT_TO_FP of this type, use it.
+    if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+      OpToUse = ISD::SINT_TO_FP;
+      break;
+    }
+    if (isSigned) continue;
+
+    // If the target supports UINT_TO_FP of this type, use it.
+    if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+      OpToUse = ISD::UINT_TO_FP;
+      break;
+    }
+
+    // Otherwise, try a larger type.
+  }
+
+  // Okay, we found the operation and type to use.  Zero extend our input to the
+  // desired type then run the operation on it.
+  return DAG.getNode(OpToUse, dl, DestVT,
+                     DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                                 dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+                                                    EVT DestVT,
+                                                    bool isSigned,
+                                                    DebugLoc dl) {
+  // First step, figure out the appropriate FP_TO*INT operation to use.
+  EVT NewOutTy = DestVT;
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+    assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+      OpToUse = ISD::FP_TO_SINT;
+      break;
+    }
+
+    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+      OpToUse = ISD::FP_TO_UINT;
+      break;
+    }
+
+    // Otherwise, try a larger type.
+  }
+
+
+  // Okay, we found the operation and type to use.
+  SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+  // Truncate the result of the extended FP_TO_*INT operation to the desired
+  // size.
+  return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+  EVT VT = Op.getValueType();
+  EVT SHVT = TLI.getShiftAmountTy();
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+  case MVT::i16:
+    Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  case MVT::i32:
+    Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+    return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+  case MVT::i64:
+    Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+    Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+    Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+    Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+    Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+    Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+    Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+    return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+  }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+                                             DebugLoc dl) {
+  switch (Opc) {
+  default: llvm_unreachable("Cannot expand this yet!");
+  case ISD::CTPOP: {
+    static const uint64_t mask[6] = {
+      0x5555555555555555ULL, 0x3333333333333333ULL,
+      0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+      0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+    };
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy();
+    unsigned len = VT.getSizeInBits();
+    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+      //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
+      unsigned EltSize = VT.isVector() ?
+        VT.getVectorElementType().getSizeInBits() : len;
+      SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
+      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      Op = DAG.getNode(ISD::ADD, dl, VT,
+                       DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
+                       DAG.getNode(ISD::AND, dl, VT,
+                                   DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
+                                   Tmp2));
+    }
+    return Op;
+  }
+  case ISD::CTLZ: {
+    // for now, we do this:
+    // x = x | (x >> 1);
+    // x = x | (x >> 2);
+    // ...
+    // x = x | (x >>16);
+    // x = x | (x >>32); // for 64-bit input
+    // return popcount(~x);
+    //
+    // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy();
+    unsigned len = VT.getSizeInBits();
+    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      Op = DAG.getNode(ISD::OR, dl, VT, Op,
+                       DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+    }
+    Op = DAG.getNOT(dl, Op, VT);
+    return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+  }
+  case ISD::CTTZ: {
+    // for now, we use: { return popcount(~x & (x - 1)); }
+    // unless the target has ctlz but not ctpop, in which case we use:
+    // { return 32 - nlz(~x & (x-1)); }
+    // see also http://www.hackersdelight.org/HDcode/ntz.cc
+    EVT VT = Op.getValueType();
+    SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+                               DAG.getNOT(dl, Op, VT),
+                               DAG.getNode(ISD::SUB, dl, VT, Op,
+                                           DAG.getConstant(1, VT)));
+    // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+    if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+        TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+      return DAG.getNode(ISD::SUB, dl, VT,
+                         DAG.getConstant(VT.getSizeInBits(), VT),
+                         DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+    return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+  }
+  }
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node,
+                                      SmallVectorImpl<SDValue> &Results) {
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+  switch (Node->getOpcode()) {
+  case ISD::CTPOP:
+  case ISD::CTLZ:
+  case ISD::CTTZ:
+    Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::BSWAP:
+    Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+    break;
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR:
+  case ISD::FRAME_TO_ARGS_OFFSET:
+    Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+    break;
+  case ISD::FLT_ROUNDS_:
+    Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+    break;
+  case ISD::EH_RETURN:
+  case ISD::EH_LABEL:
+  case ISD::PREFETCH:
+  case ISD::MEMBARRIER:
+  case ISD::VAEND:
+    Results.push_back(Node->getOperand(0));
+    break;
+  case ISD::DYNAMIC_STACKALLOC:
+    ExpandDYNAMIC_STACKALLOC(Node, Results);
+    break;
+  case ISD::MERGE_VALUES:
+    for (unsigned i = 0; i < Node->getNumValues(); i++)
+      Results.push_back(Node->getOperand(i));
+    break;
+  case ISD::UNDEF: {
+    EVT VT = Node->getValueType(0);
+    if (VT.isInteger())
+      Results.push_back(DAG.getConstant(0, VT));
+    else if (VT.isFloatingPoint())
+      Results.push_back(DAG.getConstantFP(0, VT));
+    else
+      llvm_unreachable("Unknown value type!");
+    break;
+  }
+  case ISD::TRAP: {
+    // If this operation is not supported, lower it to 'abort()' call
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> CallResult =
+      TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+                      false, false, false, false, 0, CallingConv::C, false,
+                      /*isReturnValueUsed=*/true,
+                      DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+                      Args, DAG, dl, DAG.GetOrdering(Node));
+    Results.push_back(CallResult.second);
+    break;
+  }
+  case ISD::FP_ROUND:
+  case ISD::BIT_CONVERT:
+    Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FP_EXTEND:
+    Tmp1 = EmitStackConvert(Node->getOperand(0),
+                            Node->getOperand(0).getValueType(),
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    // NOTE: we could fall back on load/store here too for targets without
+    // SAR.  However, it is doubtful that any exist.
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT VT = Node->getValueType(0);
+    EVT ShiftAmountTy = TLI.getShiftAmountTy();
+    if (VT.isVector())
+      ShiftAmountTy = VT;
+    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                        ExtraVT.getScalarType().getSizeInBits();
+    SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+    Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+                       Node->getOperand(0), ShiftCst);
+    Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FP_ROUND_INREG: {
+    // The only way we can lower this is to turn it into a TRUNCSTORE,
+    // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+    // NOTE: there is a choice here between constantly creating new stack
+    // slots and always reusing the same one.  We currently always create
+    // new ones, as reuse may inhibit scheduling.
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+                                Node->getOperand(0), Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FP_TO_UINT: {
+    SDValue True, False;
+    EVT VT =  Node->getOperand(0).getValueType();
+    EVT NVT = Node->getValueType(0);
+    const uint64_t zero[] = {0, 0};
+    APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+    APInt x = APInt::getSignBit(NVT.getSizeInBits());
+    (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+    Tmp1 = DAG.getConstantFP(apf, VT);
+    Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+                        Node->getOperand(0),
+                        Tmp1, ISD::SETLT);
+    True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+    False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+                        DAG.getNode(ISD::FSUB, dl, VT,
+                                    Node->getOperand(0), Tmp1));
+    False = DAG.getNode(ISD::XOR, dl, NVT, False,
+                        DAG.getConstant(x, NVT));
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::VAARG: {
+    const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+    EVT VT = Node->getValueType(0);
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
+    // Increment the pointer, VAList, to the next vaarg
+    Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+                       DAG.getConstant(TLI.getTargetData()->
+                                       getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+                                       TLI.getPointerTy()));
+    // Store the incremented VAList to the legalized pointer
+    Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
+    // Load the actual argument out of the pointer VAList
+    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::VACOPY: {
+    // This defaults to loading a pointer from the input and storing it to the
+    // output, returning the chain.
+    const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+    const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+    Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+                       Node->getOperand(2), VS, 0);
+    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT:
+    if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+      // This must be an access of the only element.  Return it.
+      Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), 
+                         Node->getOperand(0));
+    else
+      Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+    Results.push_back(Tmp1);
+    break;
+  case ISD::EXTRACT_SUBVECTOR:
+    Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+    break;
+  case ISD::CONCAT_VECTORS: {
+    Results.push_back(ExpandVectorBuildThroughStack(Node));
+    break;
+  }
+  case ISD::SCALAR_TO_VECTOR:
+    Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+    break;
+  case ISD::INSERT_VECTOR_ELT:
+    Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+                                              Node->getOperand(1),
+                                              Node->getOperand(2), dl));
+    break;
+  case ISD::VECTOR_SHUFFLE: {
+    SmallVector<int, 8> Mask;
+    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+    EVT VT = Node->getValueType(0);
+    EVT EltVT = VT.getVectorElementType();
+    unsigned NumElems = VT.getVectorNumElements();
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0; i != NumElems; ++i) {
+      if (Mask[i] < 0) {
+        Ops.push_back(DAG.getUNDEF(EltVT));
+        continue;
+      }
+      unsigned Idx = Mask[i];
+      if (Idx < NumElems)
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                  Node->getOperand(0),
+                                  DAG.getIntPtrConstant(Idx)));
+      else
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                  Node->getOperand(1),
+                                  DAG.getIntPtrConstant(Idx - NumElems)));
+    }
+    Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::EXTRACT_ELEMENT: {
+    EVT OpTy = Node->getOperand(0).getValueType();
+    if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+      // 1 -> Hi
+      Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+                         DAG.getConstant(OpTy.getSizeInBits()/2,
+                                         TLI.getShiftAmountTy()));
+      Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+    } else {
+      // 0 -> Lo
+      Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+                         Node->getOperand(0));
+    }
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::STACKSAVE:
+    // Expand to CopyFromReg if the target set
+    // StackPointerRegisterToSaveRestore.
+    if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+      Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+                                           Node->getValueType(0)));
+      Results.push_back(Results[0].getValue(1));
+    } else {
+      Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+      Results.push_back(Node->getOperand(0));
+    }
+    break;
+  case ISD::STACKRESTORE:
+    // Expand to CopyToReg if the target set
+    // StackPointerRegisterToSaveRestore.
+    if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+      Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+                                         Node->getOperand(1)));
+    } else {
+      Results.push_back(Node->getOperand(0));
+    }
+    break;
+  case ISD::FCOPYSIGN:
+    Results.push_back(ExpandFCOPYSIGN(Node));
+    break;
+  case ISD::FNEG:
+    // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
+    Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+    Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+                       Node->getOperand(0));
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FABS: {
+    // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+    EVT VT = Node->getValueType(0);
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = DAG.getConstantFP(0.0, VT);
+    Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+                        Tmp1, Tmp2, ISD::SETUGT);
+    Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FSQRT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                                      RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+    break;
+  case ISD::FSIN:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                      RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+    break;
+  case ISD::FCOS:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+                                      RTLIB::COS_F80, RTLIB::COS_PPCF128));
+    break;
+  case ISD::FLOG:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+                                      RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+    break;
+  case ISD::FLOG2:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                                      RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+    break;
+  case ISD::FLOG10:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+                                      RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+    break;
+  case ISD::FEXP:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+                                      RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+    break;
+  case ISD::FEXP2:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                                      RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+    break;
+  case ISD::FTRUNC:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+    break;
+  case ISD::FFLOOR:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+    break;
+  case ISD::FCEIL:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                                      RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+    break;
+  case ISD::FRINT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+                                      RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+    break;
+  case ISD::FNEARBYINT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+                                      RTLIB::NEARBYINT_F64,
+                                      RTLIB::NEARBYINT_F80,
+                                      RTLIB::NEARBYINT_PPCF128));
+    break;
+  case ISD::FPOWI:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+                                      RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+    break;
+  case ISD::FPOW:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+                                      RTLIB::POW_F80, RTLIB::POW_PPCF128));
+    break;
+  case ISD::FDIV:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+                                      RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+    break;
+  case ISD::FREM:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+                                      RTLIB::REM_F80, RTLIB::REM_PPCF128));
+    break;
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+    // Check to see if this FP immediate is already legal.
+    // If this is a legal constant, turn it into a TargetConstantFP node.
+    if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+      Results.push_back(SDValue(Node, 0));
+    else
+      Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+    break;
+  }
+  case ISD::EHSELECTION: {
+    unsigned Reg = TLI.getExceptionSelectorRegister();
+    assert(Reg && "Can't expand to unknown register!");
+    Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+                                         Node->getValueType(0)));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::EXCEPTIONADDR: {
+    unsigned Reg = TLI.getExceptionAddressRegister();
+    assert(Reg && "Can't expand to unknown register!");
+    Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+                                         Node->getValueType(0)));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::SUB: {
+    EVT VT = Node->getValueType(0);
+    assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+           TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+           "Don't know how to expand this subtraction!");
+    Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+               DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+    Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+    Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+    break;
+  }
+  case ISD::UREM:
+  case ISD::SREM: {
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    bool isSigned = Node->getOpcode() == ISD::SREM;
+    unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+    unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+    Tmp2 = Node->getOperand(0);
+    Tmp3 = Node->getOperand(1);
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+      Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+    } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+      // X % Y -> X-X/Y*Y
+      Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+      Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+    } else if (isSigned) {
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SREM_I8,
+                              RTLIB::SREM_I16, RTLIB::SREM_I32,
+                              RTLIB::SREM_I64, RTLIB::SREM_I128);
+    } else {
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UREM_I8,
+                              RTLIB::UREM_I16, RTLIB::UREM_I32,
+                              RTLIB::UREM_I64, RTLIB::UREM_I128);
+    }
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::UDIV:
+  case ISD::SDIV: {
+    bool isSigned = Node->getOpcode() == ISD::SDIV;
+    unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+      Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+                         Node->getOperand(1));
+    else if (isSigned)
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SDIV_I8,
+                              RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+                              RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+    else
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UDIV_I8,
+                              RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+                              RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::MULHU:
+  case ISD::MULHS: {
+    unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+                                                              ISD::SMUL_LOHI;
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+           "If this wasn't legal, it shouldn't have been created!");
+    Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+                       Node->getOperand(1));
+    Results.push_back(Tmp1.getValue(1));
+    break;
+  }
+  case ISD::MUL: {
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    // See if multiply or divide can be lowered using two-result operations.
+    // We just need the low half of the multiply; try both the signed
+    // and unsigned forms. If the target supports both SMUL_LOHI and
+    // UMUL_LOHI, form a preference by checking which forms of plain
+    // MULH it supports.
+    bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+    bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+    bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+    bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+    unsigned OpToUse = 0;
+    if (HasSMUL_LOHI && !HasMULHS) {
+      OpToUse = ISD::SMUL_LOHI;
+    } else if (HasUMUL_LOHI && !HasMULHU) {
+      OpToUse = ISD::UMUL_LOHI;
+    } else if (HasSMUL_LOHI) {
+      OpToUse = ISD::SMUL_LOHI;
+    } else if (HasUMUL_LOHI) {
+      OpToUse = ISD::UMUL_LOHI;
+    }
+    if (OpToUse) {
+      Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+                                    Node->getOperand(1)));
+      break;
+    }
+    Tmp1 = ExpandIntLibCall(Node, false,
+                            RTLIB::MUL_I8,
+                            RTLIB::MUL_I16, RTLIB::MUL_I32,
+                            RTLIB::MUL_I64, RTLIB::MUL_I128);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SADDO:
+  case ISD::SSUBO: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                              LHS, RHS);
+    Results.push_back(Sum);
+    EVT OType = Node->getValueType(1);
+
+    SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+    //   LHSSign -> LHS >= 0
+    //   RHSSign -> RHS >= 0
+    //   SumSign -> Sum >= 0
+    //
+    //   Add:
+    //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+    //   Sub:
+    //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+    //
+    SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+    SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+    SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+                                      Node->getOpcode() == ISD::SADDO ?
+                                      ISD::SETEQ : ISD::SETNE);
+
+    SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+    SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+    SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+    Results.push_back(Cmp);
+    break;
+  }
+  case ISD::UADDO:
+  case ISD::USUBO: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                              LHS, RHS);
+    Results.push_back(Sum);
+    Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+                                   Node->getOpcode () == ISD::UADDO ?
+                                   ISD::SETULT : ISD::SETUGT));
+    break;
+  }
+  case ISD::UMULO:
+  case ISD::SMULO: {
+    EVT VT = Node->getValueType(0);
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue BottomHalf;
+    SDValue TopHalf;
+    static const unsigned Ops[2][3] =
+        { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+          { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+    bool isSigned = Node->getOpcode() == ISD::SMULO;
+    if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+      BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+      TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+    } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+      BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+                               RHS);
+      TopHalf = BottomHalf.getValue(1);
+    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) {
+      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                            DAG.getIntPtrConstant(1));
+    } else {
+      // FIXME: We should be able to fall back to a libcall with an illegal
+      // type in some cases.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      llvm_unreachable("Don't know how to expand this operation yet!");
+    }
+    if (isSigned) {
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+      Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+                             ISD::SETNE);
+    } else {
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+                             DAG.getConstant(0, VT), ISD::SETNE);
+    }
+    Results.push_back(BottomHalf);
+    Results.push_back(TopHalf);
+    break;
+  }
+  case ISD::BUILD_PAIR: {
+    EVT PairTy = Node->getValueType(0);
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+    Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+                       DAG.getConstant(PairTy.getSizeInBits()/2,
+                                       TLI.getShiftAmountTy()));
+    Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+    break;
+  }
+  case ISD::SELECT:
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = Node->getOperand(2);
+    if (Tmp1.getOpcode() == ISD::SETCC) {
+      Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+                             Tmp2, Tmp3,
+                             cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+    } else {
+      Tmp1 = DAG.getSelectCC(dl, Tmp1,
+                             DAG.getConstant(0, Tmp1.getValueType()),
+                             Tmp2, Tmp3, ISD::SETNE);
+    }
+    Results.push_back(Tmp1);
+    break;
+  case ISD::BR_JT: {
+    SDValue Chain = Node->getOperand(0);
+    SDValue Table = Node->getOperand(1);
+    SDValue Index = Node->getOperand(2);
+
+    EVT PTy = TLI.getPointerTy();
+
+    const TargetData &TD = *TLI.getTargetData();
+    unsigned EntrySize =
+      DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+    
+    Index = DAG.getNode(ISD::MUL, dl, PTy,
+                        Index, DAG.getConstant(EntrySize, PTy));
+    SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+                                PseudoSourceValue::getJumpTable(), 0, MemVT);
+    Addr = LD;
+    if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+      // For PIC, the sequence is:
+      // BRIND(load(Jumptable + index) + RelocBase)
+      // RelocBase can be JumpTable, GOT or some sort of global base.
+      Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+                          TLI.getPICJumpTableRelocBase(Table, DAG));
+    }
+    Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::BRCOND:
+    // Expand brcond's setcc into its constituent parts and create a BR_CC
+    // Node.
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    if (Tmp2.getOpcode() == ISD::SETCC) {
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+                         Tmp1, Tmp2.getOperand(2),
+                         Tmp2.getOperand(0), Tmp2.getOperand(1),
+                         Node->getOperand(2));
+    } else {
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+                         DAG.getCondCode(ISD::SETNE), Tmp2,
+                         DAG.getConstant(0, Tmp2.getValueType()),
+                         Node->getOperand(2));
+    }
+    Results.push_back(Tmp1);
+    break;
+  case ISD::SETCC: {
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = Node->getOperand(2);
+    LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+    // If we expanded the SETCC into an AND/OR, return the new node
+    if (Tmp2.getNode() == 0) {
+      Results.push_back(Tmp1);
+      break;
+    }
+
+    // Otherwise, SETCC for the given comparison type must be completely
+    // illegal; expand it into a SELECT_CC.
+    EVT VT = Node->getValueType(0);
+    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SELECT_CC: {
+    Tmp1 = Node->getOperand(0);   // LHS
+    Tmp2 = Node->getOperand(1);   // RHS
+    Tmp3 = Node->getOperand(2);   // True
+    Tmp4 = Node->getOperand(3);   // False
+    SDValue CC = Node->getOperand(4);
+
+    LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+                          Tmp1, Tmp2, CC, dl);
+
+    assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+    Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+    CC = DAG.getCondCode(ISD::SETNE);
+    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+                       Tmp3, Tmp4, CC);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::BR_CC: {
+    Tmp1 = Node->getOperand(0);              // Chain
+    Tmp2 = Node->getOperand(2);              // LHS
+    Tmp3 = Node->getOperand(3);              // RHS
+    Tmp4 = Node->getOperand(1);              // CC
+
+    LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+                          Tmp2, Tmp3, Tmp4, dl);
+    LastCALLSEQ_END = DAG.getEntryNode();
+
+    assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+    Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+    Tmp4 = DAG.getCondCode(ISD::SETNE);
+    Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+                       Tmp3, Node->getOperand(4));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::GLOBAL_OFFSET_TABLE:
+  case ISD::GlobalAddress:
+  case ISD::GlobalTLSAddress:
+  case ISD::ExternalSymbol:
+  case ISD::ConstantPool:
+  case ISD::JumpTable:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+    // FIXME: Custom lowering for these operations shouldn't return null!
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      Results.push_back(SDValue(Node, i));
+    break;
+  }
+}
+void SelectionDAGLegalize::PromoteNode(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
+  EVT OVT = Node->getValueType(0);
+  if (Node->getOpcode() == ISD::UINT_TO_FP ||
+      Node->getOpcode() == ISD::SINT_TO_FP ||
+      Node->getOpcode() == ISD::SETCC) {
+    OVT = Node->getOperand(0).getValueType();
+  }
+  EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1, Tmp2, Tmp3;
+  switch (Node->getOpcode()) {
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+    // Zero extend the argument.
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+    // Perform the larger operation.
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+    if (Node->getOpcode() == ISD::CTTZ) {
+      //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+      Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
+                          Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+                          ISD::SETEQ);
+      Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+                          DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+    } else if (Node->getOpcode() == ISD::CTLZ) {
+      // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+      Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+                          DAG.getConstant(NVT.getSizeInBits() -
+                                          OVT.getSizeInBits(), NVT));
+    }
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+    break;
+  case ISD::BSWAP: {
+    unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FP_TO_UINT:
+  case ISD::FP_TO_SINT:
+    Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+                                 Node->getOpcode() == ISD::FP_TO_SINT, dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::UINT_TO_FP:
+  case ISD::SINT_TO_FP:
+    Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+                                 Node->getOpcode() == ISD::SINT_TO_FP, dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {
+    unsigned ExtOp, TruncOp;
+    if (OVT.isVector()) {
+      ExtOp   = ISD::BIT_CONVERT;
+      TruncOp = ISD::BIT_CONVERT;
+    } else if (OVT.isInteger()) {
+      ExtOp   = ISD::ANY_EXTEND;
+      TruncOp = ISD::TRUNCATE;
+    } else {
+      llvm_report_error("Cannot promote logic operation");
+    }
+    // Promote each of the values to the new type.
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    // Perform the larger operation, then convert back
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+    break;
+  }
+  case ISD::SELECT: {
+    unsigned ExtOp, TruncOp;
+    if (Node->getValueType(0).isVector()) {
+      ExtOp   = ISD::BIT_CONVERT;
+      TruncOp = ISD::BIT_CONVERT;
+    } else if (Node->getValueType(0).isInteger()) {
+      ExtOp   = ISD::ANY_EXTEND;
+      TruncOp = ISD::TRUNCATE;
+    } else {
+      ExtOp   = ISD::FP_EXTEND;
+      TruncOp = ISD::FP_ROUND;
+    }
+    Tmp1 = Node->getOperand(0);
+    // Promote each of the values to the new type.
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+    // Perform the larger operation, then round down.
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+    if (TruncOp != ISD::FP_ROUND)
+      Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+    else
+      Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+                         DAG.getIntPtrConstant(0));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    SmallVector<int, 8> Mask;
+    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+    // Cast the two input vectors.
+    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+
+    // Convert the shuffle mask to the right # elements.
+    Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SETCC: {
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(2))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+                                  Tmp1, Tmp2, Node->getOperand(2)));
+    break;
+  }
+  }
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
+  /// run - This is the main entry point to this class.
+  ///
+  SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+}
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 0000000..4f0fce7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1387 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float".  For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type.  For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+                                   RTLIB::Libcall Call_F32,
+                                   RTLIB::Libcall Call_F64,
+                                   RTLIB::Libcall Call_F80,
+                                   RTLIB::Libcall Call_PPCF128) {
+  return
+    VT == MVT::f32 ? Call_F32 :
+    VT == MVT::f64 ? Call_F64 :
+    VT == MVT::f80 ? Call_F80 :
+    VT == MVT::ppcf128 ? Call_PPCF128 :
+    RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+//  Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue R = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to soften the result of this operator!");
+
+    case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+    case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
+    case ISD::ConstantFP:
+      R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+      break;
+    case ISD::EXTRACT_VECTOR_ELT:
+      R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::FABS:        R = SoftenFloatRes_FABS(N); break;
+    case ISD::FADD:        R = SoftenFloatRes_FADD(N); break;
+    case ISD::FCEIL:       R = SoftenFloatRes_FCEIL(N); break;
+    case ISD::FCOPYSIGN:   R = SoftenFloatRes_FCOPYSIGN(N); break;
+    case ISD::FCOS:        R = SoftenFloatRes_FCOS(N); break;
+    case ISD::FDIV:        R = SoftenFloatRes_FDIV(N); break;
+    case ISD::FEXP:        R = SoftenFloatRes_FEXP(N); break;
+    case ISD::FEXP2:       R = SoftenFloatRes_FEXP2(N); break;
+    case ISD::FFLOOR:      R = SoftenFloatRes_FFLOOR(N); break;
+    case ISD::FLOG:        R = SoftenFloatRes_FLOG(N); break;
+    case ISD::FLOG2:       R = SoftenFloatRes_FLOG2(N); break;
+    case ISD::FLOG10:      R = SoftenFloatRes_FLOG10(N); break;
+    case ISD::FMUL:        R = SoftenFloatRes_FMUL(N); break;
+    case ISD::FNEARBYINT:  R = SoftenFloatRes_FNEARBYINT(N); break;
+    case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
+    case ISD::FP_EXTEND:   R = SoftenFloatRes_FP_EXTEND(N); break;
+    case ISD::FP_ROUND:    R = SoftenFloatRes_FP_ROUND(N); break;
+    case ISD::FPOW:        R = SoftenFloatRes_FPOW(N); break;
+    case ISD::FPOWI:       R = SoftenFloatRes_FPOWI(N); break;
+    case ISD::FREM:        R = SoftenFloatRes_FREM(N); break;
+    case ISD::FRINT:       R = SoftenFloatRes_FRINT(N); break;
+    case ISD::FSIN:        R = SoftenFloatRes_FSIN(N); break;
+    case ISD::FSQRT:       R = SoftenFloatRes_FSQRT(N); break;
+    case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
+    case ISD::FTRUNC:      R = SoftenFloatRes_FTRUNC(N); break;
+    case ISD::LOAD:        R = SoftenFloatRes_LOAD(N); break;
+    case ISD::SELECT:      R = SoftenFloatRes_SELECT(N); break;
+    case ISD::SELECT_CC:   R = SoftenFloatRes_SELECT_CC(N); break;
+    case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP:  R = SoftenFloatRes_XINT_TO_FP(N); break;
+    case ISD::UNDEF:       R = SoftenFloatRes_UNDEF(N); break;
+    case ISD::VAARG:       R = SoftenFloatRes_VAARG(N); break;
+  }
+
+  // If R is null, the sub-method took care of registering the result.
+  if (R.getNode())
+    SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+  return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+  // Convert the inputs to integers, and build a new pair out of them.
+  return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                     BitConvertToInteger(N->getOperand(0)),
+                     BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+  return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+                         TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     NewOp.getValueType().getVectorElementType(),
+                     NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned Size = NVT.getSizeInBits();
+
+  // Mask = ~(1 << (Size-1))
+  SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
+                                 NVT);
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::ADD_F32,
+                                  RTLIB::ADD_F64,
+                                  RTLIB::ADD_F80,
+                                  RTLIB::ADD_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::CEIL_F32,
+                                  RTLIB::CEIL_F64,
+                                  RTLIB::CEIL_F80,
+                                  RTLIB::CEIL_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+  SDValue RHS = BitConvertToInteger(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LVT = LHS.getValueType();
+  EVT RVT = RHS.getValueType();
+
+  unsigned LSize = LVT.getSizeInBits();
+  unsigned RSize = RVT.getSizeInBits();
+
+  // First get the sign bit of second operand.
+  SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+                                  DAG.getConstant(RSize - 1,
+                                                  TLI.getShiftAmountTy()));
+  SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+  // Shift right or sign-extend it if the two operands have different types.
+  int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+  if (SizeDiff > 0) {
+    SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+                          DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+    SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+  } else if (SizeDiff < 0) {
+    SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+    SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+                          DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy()));
+  }
+
+  // Clear the sign bit of the first operand.
+  SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+                               DAG.getConstant(LSize - 1,
+                                               TLI.getShiftAmountTy()));
+  Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+  LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+  // Or the value with the sign bit.
+  return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::COS_F32,
+                                  RTLIB::COS_F64,
+                                  RTLIB::COS_F80,
+                                  RTLIB::COS_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::DIV_F32,
+                                  RTLIB::DIV_F64,
+                                  RTLIB::DIV_F80,
+                                  RTLIB::DIV_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::EXP_F32,
+                                  RTLIB::EXP_F64,
+                                  RTLIB::EXP_F80,
+                                  RTLIB::EXP_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::EXP2_F32,
+                                  RTLIB::EXP2_F64,
+                                  RTLIB::EXP2_F80,
+                                  RTLIB::EXP2_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::FLOOR_F32,
+                                  RTLIB::FLOOR_F64,
+                                  RTLIB::FLOOR_F80,
+                                  RTLIB::FLOOR_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG_F32,
+                                  RTLIB::LOG_F64,
+                                  RTLIB::LOG_F80,
+                                  RTLIB::LOG_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG2_F32,
+                                  RTLIB::LOG2_F64,
+                                  RTLIB::LOG2_F80,
+                                  RTLIB::LOG2_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG10_F32,
+                                  RTLIB::LOG10_F64,
+                                  RTLIB::LOG10_F80,
+                                  RTLIB::LOG10_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::MUL_F32,
+                                  RTLIB::MUL_F64,
+                                  RTLIB::MUL_F80,
+                                  RTLIB::MUL_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::NEARBYINT_F32,
+                                  RTLIB::NEARBYINT_F64,
+                                  RTLIB::NEARBYINT_F80,
+                                  RTLIB::NEARBYINT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+  SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+                     GetSoftenedFloat(N->getOperand(0)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SUB_F32,
+                                  RTLIB::SUB_F64,
+                                  RTLIB::SUB_F80,
+                                  RTLIB::SUB_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+  return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+  return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::POW_F32,
+                                  RTLIB::POW_F64,
+                                  RTLIB::POW_F80,
+                                  RTLIB::POW_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+  assert(N->getOperand(1).getValueType() == MVT::i32 &&
+         "Unsupported power type!");
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::POWI_F32,
+                                  RTLIB::POWI_F64,
+                                  RTLIB::POWI_F80,
+                                  RTLIB::POWI_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::REM_F32,
+                                  RTLIB::REM_F64,
+                                  RTLIB::REM_F80,
+                                  RTLIB::REM_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::RINT_F32,
+                                  RTLIB::RINT_F64,
+                                  RTLIB::RINT_F80,
+                                  RTLIB::RINT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SIN_F32,
+                                  RTLIB::SIN_F64,
+                                  RTLIB::SIN_F80,
+                                  RTLIB::SIN_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SQRT_F32,
+                                  RTLIB::SQRT_F64,
+                                  RTLIB::SQRT_F80,
+                                  RTLIB::SQRT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SUB_F32,
+                                  RTLIB::SUB_F64,
+                                  RTLIB::SUB_F80,
+                                  RTLIB::SUB_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::TRUNC_F32,
+                                  RTLIB::TRUNC_F64,
+                                  RTLIB::TRUNC_F80,
+                                  RTLIB::TRUNC_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue NewL;
+  if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+    NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
+                       NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
+                       L->getSrcValue(), L->getSrcValueOffset(), NVT,
+                       L->isVolatile(), L->getAlignment());
+    // Legalized the chain result - switch anything that used the old chain to
+    // use the new one.
+    ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+    return NewL;
+  }
+
+  // Do a non-extending load followed by FP_EXTEND.
+  NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD,
+                     L->getMemoryVT(), L->getChain(),
+                     L->getBasePtr(), L->getOffset(),
+                     L->getSrcValue(), L->getSrcValueOffset(),
+                     L->getMemoryVT(),
+                     L->isVolatile(), L->getAlignment());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+  return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+  SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+  SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),
+                     N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+  SDValue Chain = N->getOperand(0); // Get the chain.
+  SDValue Ptr = N->getOperand(1); // Get the pointer.
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue NewVAARG;
+  NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+  return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+  EVT NVT = EVT();
+  DebugLoc dl = N->getDebugLoc();
+
+  // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+  // a larger type, eg: i8 -> fp.  Even if it is legal, no libcall may exactly
+  // match.  Look for an appropriate libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+       t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+    NVT = (MVT::SimpleValueType)t;
+    // The source needs to big enough to hold the operand.
+    if (NVT.bitsGE(SVT))
+      LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+  }
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+  // Sign/zero extend the argument if the libcall takes a larger type.
+  SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                           NVT, N->getOperand(0));
+  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to soften this operator's operand!");
+
+  case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+  case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
+  case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
+  case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
+  case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_UINT(N); break;
+  case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
+  case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
+  case ISD::STORE:       Res = SoftenFloatOp_STORE(N, OpNo); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// SoftenSetCCOperands - Soften the operands of a comparison.  This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                           ISD::CondCode &CCCode, DebugLoc dl) {
+  SDValue LHSInt = GetSoftenedFloat(NewLHS);
+  SDValue RHSInt = GetSoftenedFloat(NewRHS);
+  EVT VT = NewLHS.getValueType();
+
+  assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
+
+  // Expand into one or more soft-fp libcall(s).
+  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+  switch (CCCode) {
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+    break;
+  case ISD::SETNE:
+  case ISD::SETUNE:
+    LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+    break;
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+    break;
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+    break;
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+    break;
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+    break;
+  case ISD::SETUO:
+    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+    break;
+  case ISD::SETO:
+    LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+    break;
+  default:
+    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+    switch (CCCode) {
+    case ISD::SETONE:
+      // SETONE = SETOLT | SETOGT
+      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+      // Fallthrough
+    case ISD::SETUGT:
+      LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+      break;
+    case ISD::SETUGE:
+      LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+      break;
+    case ISD::SETULT:
+      LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+      break;
+    case ISD::SETULE:
+      LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+      break;
+    case ISD::SETUEQ:
+      LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+      break;
+    default: assert(false && "Do not know how to soften this setcc!");
+    }
+  }
+
+  // Use the target specific return value for comparions lib calls.
+  EVT RetVT = TLI.getCmpLibcallReturnType();
+  SDValue Ops[2] = { LHSInt, RHSInt };
+  NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+  NewRHS = DAG.getConstant(0, RetVT);
+  CCCode = TLI.getCmpLibcallCC(LC1);
+  if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+    SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
+                                NewLHS, NewRHS, DAG.getCondCode(CCCode));
+    NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+    NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
+                         NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+    NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+    NewRHS = SDValue();
+  }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
+  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+                     GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+
+  RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only soften the stored value!");
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  SDValue Val = ST->getValue();
+  DebugLoc dl = N->getDebugLoc();
+
+  if (ST->isTruncatingStore())
+    // Do an FP_ROUND followed by a non-truncating store.
+    Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+                                          Val, DAG.getIntPtrConstant(0)));
+  else
+    Val = GetSoftenedFloat(Val);
+
+  return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+                      ST->getSrcValue(), ST->getSrcValueOffset(),
+                      ST->isVolatile(), ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion.  At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Lo, Hi;
+  Lo = Hi = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+  case ISD::BIT_CONVERT:        ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+  case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+  case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::VAARG:              ExpandRes_VAARG(N, Lo, Hi); break;
+
+  case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+  case ISD::FABS:       ExpandFloatRes_FABS(N, Lo, Hi); break;
+  case ISD::FADD:       ExpandFloatRes_FADD(N, Lo, Hi); break;
+  case ISD::FCEIL:      ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+  case ISD::FCOS:       ExpandFloatRes_FCOS(N, Lo, Hi); break;
+  case ISD::FDIV:       ExpandFloatRes_FDIV(N, Lo, Hi); break;
+  case ISD::FEXP:       ExpandFloatRes_FEXP(N, Lo, Hi); break;
+  case ISD::FEXP2:      ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+  case ISD::FFLOOR:     ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+  case ISD::FLOG:       ExpandFloatRes_FLOG(N, Lo, Hi); break;
+  case ISD::FLOG2:      ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+  case ISD::FLOG10:     ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+  case ISD::FMUL:       ExpandFloatRes_FMUL(N, Lo, Hi); break;
+  case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+  case ISD::FNEG:       ExpandFloatRes_FNEG(N, Lo, Hi); break;
+  case ISD::FP_EXTEND:  ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+  case ISD::FPOW:       ExpandFloatRes_FPOW(N, Lo, Hi); break;
+  case ISD::FPOWI:      ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+  case ISD::FRINT:      ExpandFloatRes_FRINT(N, Lo, Hi); break;
+  case ISD::FSIN:       ExpandFloatRes_FSIN(N, Lo, Hi); break;
+  case ISD::FSQRT:      ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+  case ISD::FSUB:       ExpandFloatRes_FSUB(N, Lo, Hi); break;
+  case ISD::FTRUNC:     ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+  case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  assert(NVT.getSizeInBits() == integerPartWidth &&
+         "Do not know how to expand this float constant!");
+  APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+  Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+                                       &C.getRawData()[1])), NVT);
+  Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+                                       &C.getRawData()[0])), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  assert(N->getValueType(0) == MVT::ppcf128 &&
+         "Logic only correct for ppcf128!");
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Tmp;
+  GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+  Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+  // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+                   DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+                   DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::ADD_F32, RTLIB::ADD_F64,
+                                         RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                                         RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::COS_F32, RTLIB::COS_F64,
+                                         RTLIB::COS_F80, RTLIB::COS_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::DIV_F32,
+                                          RTLIB::DIV_F64,
+                                          RTLIB::DIV_F80,
+                                          RTLIB::DIV_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::EXP_F32, RTLIB::EXP_F64,
+                                         RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                                         RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
+                                         RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG_F32, RTLIB::LOG_F64,
+                                         RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                                         RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG10_F32,RTLIB::LOG10_F64,
+                                         RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::MUL_F32,
+                                          RTLIB::MUL_F64,
+                                          RTLIB::MUL_F80,
+                                          RTLIB::MUL_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+                                                 SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::NEARBYINT_F32,
+                                         RTLIB::NEARBYINT_F64,
+                                         RTLIB::NEARBYINT_F80,
+                                         RTLIB::NEARBYINT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedFloat(N->getOperand(0), Lo, Hi);
+  Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+  Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+                                                SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+  Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::POW_F32, RTLIB::POW_F64,
+                                         RTLIB::POW_F80, RTLIB::POW_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::POWI_F32, RTLIB::POWI_F64,
+                                         RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::RINT_F32, RTLIB::RINT_F64,
+                                         RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                         RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                                         RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::SUB_F32,
+                                          RTLIB::SUB_F64,
+                                          RTLIB::SUB_F80,
+                                          RTLIB::SUB_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                                         RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  if (ISD::isNormalLoad(N)) {
+    ExpandRes_NormalLoad(N, Lo, Hi);
+    return;
+  }
+
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+  Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+                      LD->getSrcValue(), LD->getSrcValueOffset(),
+                      LD->getMemoryVT(),
+                      LD->isVolatile(), LD->getAlignment());
+
+  // Remember the chain.
+  Chain = Hi.getValue(1);
+
+  // The low part is zero.
+  Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+
+  // Modified the chain - switch anything that used the old chain to use the
+  // new one.
+  ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Src = N->getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+  DebugLoc dl = N->getDebugLoc();
+
+  // First do an SINT_TO_FP, whether the original was signed or unsigned.
+  // When promoting partial word types to i32 we must honor the signedness,
+  // though.
+  if (SrcVT.bitsLE(MVT::i32)) {
+    // The integer can be represented exactly in an f64.
+    Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                      MVT::i32, Src);
+    Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+    Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+  } else {
+    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+    if (SrcVT.bitsLE(MVT::i64)) {
+      Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                        MVT::i64, Src);
+      LC = RTLIB::SINTTOFP_I64_PPCF128;
+    } else if (SrcVT.bitsLE(MVT::i128)) {
+      Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+      LC = RTLIB::SINTTOFP_I128_PPCF128;
+    }
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+    Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+    GetPairElements(Hi, Lo, Hi);
+  }
+
+  if (isSigned)
+    return;
+
+  // Unsigned - fix up the SINT_TO_FP value just calculated.
+  Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+  SrcVT = Src.getValueType();
+
+  // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+  static const uint64_t TwoE32[]  = { 0x41f0000000000000LL, 0 };
+  static const uint64_t TwoE64[]  = { 0x43f0000000000000LL, 0 };
+  static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+  const uint64_t *Parts = 0;
+
+  switch (SrcVT.getSimpleVT().SimpleTy) {
+  default:
+    assert(false && "Unsupported UINT_TO_FP!");
+  case MVT::i32:
+    Parts = TwoE32;
+    break;
+  case MVT::i64:
+    Parts = TwoE64;
+    break;
+  case MVT::i128:
+    Parts = TwoE128;
+    break;
+  }
+
+  Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+                   DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+                                     MVT::ppcf128));
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+                   Lo, Hi, DAG.getCondCode(ISD::SETLT));
+  GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion.  At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
+      == TargetLowering::Custom)
+    Res = TLI.LowerOperation(SDValue(N, 0), DAG);
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+  #ifndef NDEBUG
+      dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+      N->dump(&DAG); dbgs() << "\n";
+  #endif
+      llvm_unreachable("Do not know how to expand this operator's operand!");
+
+    case ISD::BIT_CONVERT:     Res = ExpandOp_BIT_CONVERT(N); break;
+    case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
+    case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+    case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
+    case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
+    case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+    case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+    case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
+    case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
+    case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+                                                    OpNo); break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison.  This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+                                                SDValue &NewRHS,
+                                                ISD::CondCode &CCCode,
+                                                DebugLoc dl) {
+  SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+  GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+  GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+  EVT VT = NewLHS.getValueType();
+  assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+
+  // FIXME:  This generated code sucks.  We want to generate
+  //         FCMPU crN, hi1, hi2
+  //         BNE crN, L:
+  //         FCMPU crN, lo1, lo2
+  // The following can be improved, but not that much.
+  SDValue Tmp1, Tmp2, Tmp3;
+  Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, ISD::SETOEQ);
+  Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+                      LHSLo, RHSLo, CCCode);
+  Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+  Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, ISD::SETUNE);
+  Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, CCCode);
+  Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+  NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+  NewRHS = SDValue();   // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+  assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+         "Logic only correct for ppcf128!");
+  SDValue Lo, Hi;
+  GetExpandedFloat(N->getOperand(0), Lo, Hi);
+  // Round it the rest of the way (e.g. to f32) if needed.
+  return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+                     N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+  // PPC (the libcall is not available).  FIXME: Do this in a less hacky way.
+  if (RVT == MVT::i32) {
+    assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+           "Logic only correct for ppcf128!");
+    SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+                              N->getOperand(0), DAG.getValueType(MVT::f64));
+    Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+                      DAG.getIntPtrConstant(1));
+    return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+  }
+
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+  return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+  // PPC (the libcall is not available).  FIXME: Do this in a less hacky way.
+  if (RVT == MVT::i32) {
+    assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+           "Logic only correct for ppcf128!");
+    const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+    APFloat APF = APFloat(APInt(128, 2, TwoE31));
+    SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+    //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+    // FIXME: generated code sucks.
+    return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+                                               DAG.getNode(ISD::FSUB, dl,
+                                                           MVT::ppcf128,
+                                                           N->getOperand(0),
+                                                           Tmp)),
+                                   DAG.getConstant(0x80000000, MVT::i32)),
+                       DAG.getNode(ISD::FP_TO_SINT, dl,
+                                   MVT::i32, N->getOperand(0)),
+                       DAG.getCondCode(ISD::SETGE));
+  }
+
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+  return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+  if (ISD::isNormalStore(N))
+    return ExpandOp_NormalStore(N, OpNo);
+
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType());
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+  SDValue Lo, Hi;
+  GetExpandedOp(ST->getValue(), Lo, Hi);
+
+  return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+                           ST->getSrcValue(), ST->getSrcValueOffset(),
+                           ST->getMemoryVT(),
+                           ST->isVolatile(), ST->getAlignment());
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 0000000..9932cf4
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,2353 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type.  For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type.  For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type.  At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to promote this operator!");
+  case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
+  case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break;
+  case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+  case ISD::BSWAP:       Res = PromoteIntRes_BSWAP(N); break;
+  case ISD::BUILD_PAIR:  Res = PromoteIntRes_BUILD_PAIR(N); break;
+  case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
+  case ISD::CONVERT_RNDSAT:
+                         Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+  case ISD::CTLZ:        Res = PromoteIntRes_CTLZ(N); break;
+  case ISD::CTPOP:       Res = PromoteIntRes_CTPOP(N); break;
+  case ISD::CTTZ:        Res = PromoteIntRes_CTTZ(N); break;
+  case ISD::EXTRACT_VECTOR_ELT:
+                         Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::LOAD:        Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+  case ISD::SELECT:      Res = PromoteIntRes_SELECT(N); break;
+  case ISD::SELECT_CC:   Res = PromoteIntRes_SELECT_CC(N); break;
+  case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;
+  case ISD::SHL:         Res = PromoteIntRes_SHL(N); break;
+  case ISD::SIGN_EXTEND_INREG:
+                         Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+  case ISD::SRA:         Res = PromoteIntRes_SRA(N); break;
+  case ISD::SRL:         Res = PromoteIntRes_SRL(N); break;
+  case ISD::TRUNCATE:    Res = PromoteIntRes_TRUNCATE(N); break;
+  case ISD::UNDEF:       Res = PromoteIntRes_UNDEF(N); break;
+  case ISD::VAARG:       Res = PromoteIntRes_VAARG(N); break;
+
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:  Res = PromoteIntRes_INT_EXTEND(N); break;
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:  Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:         Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+  case ISD::SDIV:
+  case ISD::SREM:        Res = PromoteIntRes_SDIV(N); break;
+
+  case ISD::UDIV:
+  case ISD::UREM:        Res = PromoteIntRes_UDIV(N); break;
+
+  case ISD::SADDO:
+  case ISD::SSUBO:       Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+  case ISD::UADDO:
+  case ISD::USUBO:       Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+  case ISD::SMULO:
+  case ISD::UMULO:       Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_SWAP:
+    Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+  case ISD::ATOMIC_CMP_SWAP:
+    Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+  }
+
+  // If the result is null then the sub-method took care of registering it.
+  if (Res.getNode())
+    SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+  // Sign-extend the new bits, and continue the assertion.
+  SDValue Op = SExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+  // Zero the new bits, and continue the assertion.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+  SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+  SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+                              N->getMemoryVT(),
+                              N->getChain(), N->getBasePtr(),
+                              Op2, N->getSrcValue(), N->getAlignment());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+  SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+  SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+  SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+                              N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+                              Op2, Op3, N->getSrcValue(), N->getAlignment());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+    break;
+  case Legal:
+    break;
+  case PromoteInteger:
+    if (NOutVT.bitsEq(NInVT))
+      // The input promotes to the same size.  Convert the promoted value.
+      return DAG.getNode(ISD::BIT_CONVERT, dl,
+                         NOutVT, GetPromotedInteger(InOp));
+    break;
+  case SoftenFloat:
+    // Promote the integer operand by hand.
+    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+  case ExpandInteger:
+  case ExpandFloat:
+    break;
+  case ScalarizeVector:
+    // Convert the element to an integer and promote it by hand.
+    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                       BitConvertToInteger(GetScalarizedVector(InOp)));
+  case SplitVector: {
+    // For example, i32 = BIT_CONVERT v2i16 on alpha.  Convert the split
+    // pieces of the input into integers and reassemble in the final type.
+    SDValue Lo, Hi;
+    GetSplitVector(N->getOperand(0), Lo, Hi);
+    Lo = BitConvertToInteger(Lo);
+    Hi = BitConvertToInteger(Hi);
+
+    if (TLI.isBigEndian())
+      std::swap(Lo, Hi);
+
+    InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+                       EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()),
+                       JoinIntegers(Lo, Hi));
+    return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+  }
+  case WidenVector:
+    if (OutVT.bitsEq(NInVT))
+      // The input is widened to the same size.  Convert to the widened value.
+      return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+  }
+
+  return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                     CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+  return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+                     DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+  // The pair element type may be legal, or may not promote to the same type as
+  // the result, for example i14 = BUILD_PAIR (i7, i7).  Handle all cases.
+  return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                     JoinIntegers(N->getOperand(0), N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  // FIXME there is no actual debug info here
+  DebugLoc dl = N->getDebugLoc();
+  // Zero extend things like i1, sign extend everything else.  It shouldn't
+  // matter in theory which one we pick, but this tends to give better code?
+  unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+  SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+                               SDValue(N, 0));
+  assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+  assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+           CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+           CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+          "can only promote integers");
+  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+                              N->getOperand(1), N->getOperand(2),
+                              N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+  // Zero extend to the promoted type and do the count there.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  DebugLoc dl = N->getDebugLoc();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+  // Subtract off the extra leading bits in the bigger type.
+  return DAG.getNode(ISD::SUB, dl, NVT, Op,
+                     DAG.getConstant(NVT.getSizeInBits() -
+                                     OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+  // Zero extend to the promoted type and do the count there.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+  // The count is the same in the promoted type except if the original
+  // value was zero.  This can be handled by setting the bit just off
+  // the top of the original type.
+  APInt TopBit(NVT.getSizeInBits(), 0);
+  TopBit.set(OVT.getSizeInBits());
+  Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+  return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+                     N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned NewOpc = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
+
+  // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+  // not Legal, check to see if we can use FP_TO_SINT instead.  (If both UINT
+  // and SINT conversions are Custom, there is no way to tell which is preferable.
+  // We choose SINT because that's the right thing on PPC.)
+  if (N->getOpcode() == ISD::FP_TO_UINT &&
+      !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+      TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+    NewOpc = ISD::FP_TO_SINT;
+
+  SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+  // Assert that the converted value fits in the original type.  If it doesn't
+  // (eg: because the value being converted is too big), then the result of the
+  // original operation was undefined anyway, so the assert is still correct.
+  return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+                     ISD::AssertZext : ISD::AssertSext, dl,
+                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+
+  if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
+    SDValue Res = GetPromotedInteger(N->getOperand(0));
+    assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+    // If the result and operand types are the same after promotion, simplify
+    // to an in-register extension.
+    if (NVT == Res.getValueType()) {
+      // The high bits are not guaranteed to be anything.  Insert an extend.
+      if (N->getOpcode() == ISD::SIGN_EXTEND)
+        return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+                           DAG.getValueType(N->getOperand(0).getValueType()));
+      if (N->getOpcode() == ISD::ZERO_EXTEND)
+        return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+      assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+      return Res;
+    }
+  }
+
+  // Otherwise, just extend the original operand all the way to the larger type.
+  return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  ISD::LoadExtType ExtType =
+    ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+                               N->getSrcValue(), N->getSrcValueOffset(),
+                               N->getMemoryVT(), N->isVolatile(),
+                               N->getAlignment());
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+  // Simply change the return type of the boolean result.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+  EVT ValueVTs[] = { N->getValueType(0), NVT };
+  SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                            DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+  // Modified the sum result - switch anything that used the old sum to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 0), Res);
+
+  return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  // The operation overflowed iff the result in the larger type is not the
+  // sign extension of its truncation to the original type.
+  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Do the arithmetic in the larger type.
+  unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+  SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+  // Calculate the overflow flag: sign extend the arithmetic result from
+  // the original type.
+  SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+                            DAG.getValueType(OVT));
+  // Overflowed if and only if this is not equal to Res.
+  Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+  // Sign extend the input.
+  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+  SDValue LHS = GetPromotedInteger(N->getOperand(1));
+  SDValue RHS = GetPromotedInteger(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetPromotedInteger(N->getOperand(2));
+  SDValue RHS = GetPromotedInteger(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),
+                     N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+  assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+  DebugLoc dl = N->getDebugLoc();
+
+  // Get the SETCC result using the canonical SETCC type.
+  SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+                              N->getOperand(1), N->getOperand(2));
+
+  // Convert to the expected type.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+  return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+  return DAG.getNode(ISD::SHL, N->getDebugLoc(),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                     GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+  // The input may have strange things in the top bits of the registers, but
+  // these operations don't care.  They may have weird bits going out, but
+  // that too is okay if they are integer operations.
+  SDValue LHS = GetPromotedInteger(N->getOperand(0));
+  SDValue RHS = GetPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                    LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+  // The input value must be properly sign extended.
+  SDValue Res = SExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SRA, N->getDebugLoc(),
+                     Res.getValueType(), Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+  // The input value must be properly zero extended.
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Res;
+
+  switch (getTypeAction(N->getOperand(0).getValueType())) {
+  default: llvm_unreachable("Unknown type action!");
+  case Legal:
+  case ExpandInteger:
+    Res = N->getOperand(0);
+    break;
+  case PromoteInteger:
+    Res = GetPromotedInteger(N->getOperand(0));
+    break;
+  }
+
+  // Truncate to NVT instead of VT
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  // The operation overflowed iff the result in the larger type is not the
+  // zero extension of its truncation to the original type.
+  SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Do the arithmetic in the larger type.
+  unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+  SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+  // Calculate the overflow flag: zero extend the arithmetic result from
+  // the original type.
+  SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+  // Overflowed if and only if this is not equal to Res.
+  Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+  // Zero extend the input.
+  SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+  SDValue Chain = N->getOperand(0); // Get the chain.
+  SDValue Ptr = N->getOperand(1); // Get the pointer.
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+  // The argument is passed as NumRegs registers of type RegVT.
+
+  SmallVector<SDValue, 8> Parts(NumRegs);
+  for (unsigned i = 0; i < NumRegs; ++i) {
+    Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2));
+    Chain = Parts[i].getValue(1);
+  }
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::reverse(Parts.begin(), Parts.end());
+
+  // Assemble the parts in the promoted type.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+  for (unsigned i = 1; i < NumRegs; ++i) {
+    SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+    // Shift it to the right position and "or" it in.
+    Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+                       DAG.getConstant(i * RegVT.getSizeInBits(),
+                                       TLI.getPointerTy()));
+    Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+  }
+
+  // Modified the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Chain);
+
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+  assert(ResNo == 1 && "Only boolean result promotion currently supported!");
+  return PromoteIntRes_Overflow(N);
+}
+
+//===----------------------------------------------------------------------===//
+//  Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+    default:
+  #ifndef NDEBUG
+    dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+  #endif
+    llvm_unreachable("Do not know how to promote this operator's operand!");
+
+  case ISD::ANY_EXTEND:   Res = PromoteIntOp_ANY_EXTEND(N); break;
+  case ISD::BIT_CONVERT:  Res = PromoteIntOp_BIT_CONVERT(N); break;
+  case ISD::BR_CC:        Res = PromoteIntOp_BR_CC(N, OpNo); break;
+  case ISD::BRCOND:       Res = PromoteIntOp_BRCOND(N, OpNo); break;
+  case ISD::BUILD_PAIR:   Res = PromoteIntOp_BUILD_PAIR(N); break;
+  case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+  case ISD::CONVERT_RNDSAT:
+                          Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+  case ISD::INSERT_VECTOR_ELT:
+                          Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+  case ISD::MEMBARRIER:   Res = PromoteIntOp_MEMBARRIER(N); break;
+  case ISD::SCALAR_TO_VECTOR:
+                          Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+  case ISD::SELECT:       Res = PromoteIntOp_SELECT(N, OpNo); break;
+  case ISD::SELECT_CC:    Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+  case ISD::SETCC:        Res = PromoteIntOp_SETCC(N, OpNo); break;
+  case ISD::SIGN_EXTEND:  Res = PromoteIntOp_SIGN_EXTEND(N); break;
+  case ISD::SINT_TO_FP:   Res = PromoteIntOp_SINT_TO_FP(N); break;
+  case ISD::STORE:        Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+                                                   OpNo); break;
+  case ISD::TRUNCATE:     Res = PromoteIntOp_TRUNCATE(N); break;
+  case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
+  case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison.  This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+                                            ISD::CondCode CCCode) {
+  // We have to insert explicit sign or zero extends.  Note that we could
+  // insert sign extends for ALL conditions, but zero extend is cheaper on
+  // many machines (an AND instead of two shifts), so prefer it.
+  switch (CCCode) {
+  default: llvm_unreachable("Unknown integer comparison!");
+  case ISD::SETEQ:
+  case ISD::SETNE:
+  case ISD::SETUGE:
+  case ISD::SETUGT:
+  case ISD::SETULE:
+  case ISD::SETULT:
+    // ALL of these operations will work if we either sign or zero extend
+    // the operands (including the unsigned comparisons!).  Zero extend is
+    // usually a simpler/cheaper operation, so prefer it.
+    NewLHS = ZExtPromotedInteger(NewLHS);
+    NewRHS = ZExtPromotedInteger(NewRHS);
+    break;
+  case ISD::SETGE:
+  case ISD::SETGT:
+  case ISD::SETLT:
+  case ISD::SETLE:
+    NewLHS = SExtPromotedInteger(NewLHS);
+    NewRHS = SExtPromotedInteger(NewRHS);
+    break;
+  }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+  // This should only occur in unusual situations like bitcasting to an
+  // x86_fp80, so just turn it into a store+load
+  return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(2);
+  SDValue RHS = N->getOperand(3);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+  // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+  // legal types.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+                                N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 1 && "only know how to promote condition");
+
+  // Promote all the way up to the canonical SetCC type.
+  EVT SVT = TLI.getSetCCResultType(MVT::Other);
+  SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+  // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond,
+                                N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+  // Since the result type is legal, the operands must promote to it.
+  EVT OVT = N->getOperand(0).getValueType();
+  SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+  SDValue Hi = GetPromotedInteger(N->getOperand(1));
+  assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+  DebugLoc dl = N->getDebugLoc();
+
+  Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+                   DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+  return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+  // The vector type is legal but the element type is not.  This implies
+  // that the vector is a power-of-two in length and that the element
+  // type does not have a strange size (eg: it is not i1).
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+  // Promote the inserted value.  The type does not need to match the
+  // vector element type.  Check that any extra bits introduced will be
+  // truncated away.
+  assert(N->getOperand(0).getValueType().getSizeInBits() >=
+         N->getValueType(0).getVectorElementType().getSizeInBits() &&
+         "Type of inserted value narrower than vector element type!");
+
+  SmallVector<SDValue, 16> NewOps;
+  for (unsigned i = 0; i < NumElts; ++i)
+    NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+  return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+  assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+           CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+           CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+           "can only promote integer arguments");
+  SDValue InOp = GetPromotedInteger(N->getOperand(0));
+  return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+                              N->getOperand(1), N->getOperand(2),
+                              N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+                                                         unsigned OpNo) {
+  if (OpNo == 1) {
+    // Promote the inserted value.  This is valid because the type does not
+    // have to match the vector element type.
+
+    // Check that any extra bits introduced will be truncated away.
+    assert(N->getOperand(1).getValueType().getSizeInBits() >=
+           N->getValueType(0).getVectorElementType().getSizeInBits() &&
+           "Type of inserted value narrower than vector element type!");
+    return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+                                  GetPromotedInteger(N->getOperand(1)),
+                                  N->getOperand(2));
+  }
+
+  assert(OpNo == 2 && "Different operand and result vector types?");
+
+  // Promote the index.
+  SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+  return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+                                N->getOperand(1), Idx);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+  SDValue NewOps[6];
+  DebugLoc dl = N->getDebugLoc();
+  NewOps[0] = N->getOperand(0);
+  for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+    SDValue Flag = GetPromotedInteger(N->getOperand(i));
+    NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+  }
+  return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps,
+                                array_lengthof(NewOps));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+  // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+  // the operand in place.
+  return DAG.UpdateNodeOperands(SDValue(N, 0),
+                                GetPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Only know how to promote condition");
+
+  // Promote all the way up to the canonical SetCC type.
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+  SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+
+  return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,
+                                N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+  // The CC (#4) and the possible return values (#2 and #3) have legal types.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2),
+                                N->getOperand(3), N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+  // The CC (#2) is always legal.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+  return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+                                ZExtPromotedInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  DebugLoc dl = N->getDebugLoc();
+  Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+                     Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+  return DAG.UpdateNodeOperands(SDValue(N, 0),
+                                SExtPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+  int SVOffset = N->getSrcValueOffset();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.
+
+  // Truncate the value and store the result.
+  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
+                           SVOffset, N->getMemoryVT(),
+                           isVolatile, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+  return DAG.UpdateNodeOperands(SDValue(N, 0),
+                                ZExtPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+  return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion.  At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Lo, Hi;
+  Lo = Hi = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+
+  case ISD::BIT_CONVERT:        ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+  case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+  case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::VAARG:              ExpandRes_VAARG(N, Lo, Hi); break;
+
+  case ISD::ANY_EXTEND:  ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+  case ISD::AssertSext:  ExpandIntRes_AssertSext(N, Lo, Hi); break;
+  case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
+  case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
+  case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
+  case ISD::CTLZ:        ExpandIntRes_CTLZ(N, Lo, Hi); break;
+  case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
+  case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
+  case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+  case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+  case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+  case ISD::MUL:         ExpandIntRes_MUL(N, Lo, Hi); break;
+  case ISD::SDIV:        ExpandIntRes_SDIV(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+  case ISD::SREM:        ExpandIntRes_SREM(N, Lo, Hi); break;
+  case ISD::TRUNCATE:    ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+  case ISD::UDIV:        ExpandIntRes_UDIV(N, Lo, Hi); break;
+  case ISD::UREM:        ExpandIntRes_UREM(N, Lo, Hi); break;
+  case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+  case ISD::ADD:
+  case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+  case ISD::ADDC:
+  case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+  case ISD::ADDE:
+  case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'.  Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+                                             SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // Expand the incoming operand to be shifted, so that we have its parts
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  EVT NVT = InL.getValueType();
+  unsigned VTBits = N->getValueType(0).getSizeInBits();
+  unsigned NVTBits = NVT.getSizeInBits();
+  EVT ShTy = N->getOperand(1).getValueType();
+
+  if (N->getOpcode() == ISD::SHL) {
+    if (Amt > VTBits) {
+      Lo = Hi = DAG.getConstant(0, NVT);
+    } else if (Amt > NVTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = DAG.getNode(ISD::SHL, dl,
+                       NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+    } else if (Amt == NVTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = InL;
+    } else if (Amt == 1 &&
+               TLI.isOperationLegalOrCustom(ISD::ADDC,
+                                            TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+      // Emit this X << 1 as X+X.
+      SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+      SDValue LoOps[2] = { InL, InL };
+      Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+      SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+      Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
+      Hi = DAG.getNode(ISD::OR, dl, NVT,
+                       DAG.getNode(ISD::SHL, dl, NVT, InH,
+                                   DAG.getConstant(Amt, ShTy)),
+                       DAG.getNode(ISD::SRL, dl, NVT, InL,
+                                   DAG.getConstant(NVTBits-Amt, ShTy)));
+    }
+    return;
+  }
+
+  if (N->getOpcode() == ISD::SRL) {
+    if (Amt > VTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = DAG.getConstant(0, NVT);
+    } else if (Amt > NVTBits) {
+      Lo = DAG.getNode(ISD::SRL, dl,
+                       NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+      Hi = DAG.getConstant(0, NVT);
+    } else if (Amt == NVTBits) {
+      Lo = InH;
+      Hi = DAG.getConstant(0, NVT);
+    } else {
+      Lo = DAG.getNode(ISD::OR, dl, NVT,
+                       DAG.getNode(ISD::SRL, dl, NVT, InL,
+                                   DAG.getConstant(Amt, ShTy)),
+                       DAG.getNode(ISD::SHL, dl, NVT, InH,
+                                   DAG.getConstant(NVTBits-Amt, ShTy)));
+      Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+    }
+    return;
+  }
+
+  assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+  if (Amt > VTBits) {
+    Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+                          DAG.getConstant(NVTBits-1, ShTy));
+  } else if (Amt > NVTBits) {
+    Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+                     DAG.getConstant(Amt-NVTBits, ShTy));
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+                     DAG.getConstant(NVTBits-1, ShTy));
+  } else if (Amt == NVTBits) {
+    Lo = InH;
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+                     DAG.getConstant(NVTBits-1, ShTy));
+  } else {
+    Lo = DAG.getNode(ISD::OR, dl, NVT,
+                     DAG.getNode(ISD::SRL, dl, NVT, InL,
+                                 DAG.getConstant(Amt, ShTy)),
+                     DAG.getNode(ISD::SHL, dl, NVT, InH,
+                                 DAG.getConstant(NVTBits-Amt, ShTy)));
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+  }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount.  If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  SDValue Amt = N->getOperand(1);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
+  unsigned ShBits = ShTy.getSizeInBits();
+  unsigned NVTBits = NVT.getSizeInBits();
+  assert(isPowerOf2_32(NVTBits) &&
+         "Expanded integer type size not a power of two!");
+  DebugLoc dl = N->getDebugLoc();
+
+  APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+
+  // If we don't know anything about the high bits, exit.
+  if (((KnownZero|KnownOne) & HighBitMask) == 0)
+    return false;
+
+  // Get the incoming operand to be shifted.
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  // If we know that any of the high bits of the shift amount are one, then we
+  // can do this as a couple of simple shifts.
+  if (KnownOne.intersects(HighBitMask)) {
+    // Mask out the high bit, which we know is set.
+    Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+                      DAG.getConstant(~HighBitMask, ShTy));
+
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unknown shift");
+    case ISD::SHL:
+      Lo = DAG.getConstant(0, NVT);              // Low part is zero.
+      Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+      return true;
+    case ISD::SRL:
+      Hi = DAG.getConstant(0, NVT);              // Hi part is zero.
+      Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    case ISD::SRA:
+      Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,       // Sign extend high part.
+                       DAG.getConstant(NVTBits-1, ShTy));
+      Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    }
+  }
+
+#if 0
+  // FIXME: This code is broken for shifts with a zero amount!
+  // If we know that all of the high bits of the shift amount are zero, then we
+  // can do this as a couple of simple shifts.
+  if ((KnownZero & HighBitMask) == HighBitMask) {
+    // Compute 32-amt.
+    SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
+                                 DAG.getConstant(NVTBits, ShTy),
+                                 Amt);
+    unsigned Op1, Op2;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unknown shift");
+    case ISD::SHL:  Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+    case ISD::SRL:
+    case ISD::SRA:  Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+    }
+
+    Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
+    Hi = DAG.getNode(ISD::OR, NVT,
+                     DAG.getNode(Op1, NVT, InH, Amt),
+                     DAG.getNode(Op2, NVT, InL, Amt2));
+    return true;
+  }
+#endif
+
+  return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  SDValue Amt = N->getOperand(1);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
+  unsigned NVTBits = NVT.getSizeInBits();
+  assert(isPowerOf2_32(NVTBits) &&
+         "Expanded integer type size not a power of two!");
+  DebugLoc dl = N->getDebugLoc();
+
+  // Get the incoming operand to be shifted.
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+  SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+  SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+  SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+                                 Amt, NVBitsNode, ISD::SETULT);
+
+  SDValue LoS, HiS, LoL, HiL;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unknown shift");
+  case ISD::SHL:
+    // Short: ShAmt < NVTBits
+    LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+    HiS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    LoL = DAG.getConstant(0, NVT);                        // Lo part is zero.
+    HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  case ISD::SRL:
+    // Short: ShAmt < NVTBits
+    HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+    LoS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    HiL = DAG.getConstant(0, NVT);                        // Hi part is zero.
+    LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  case ISD::SRA:
+    // Short: ShAmt < NVTBits
+    HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+    LoS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    HiL = DAG.getNode(ISD::SRA, dl, NVT, InH,             // Sign of Hi part.
+                      DAG.getConstant(NVTBits-1, ShTy));
+    LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  }
+
+  return false;
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+  EVT NVT = LHSL.getValueType();
+  SDValue LoOps[2] = { LHSL, RHSL };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+  // them.  TODO: Teach operation legalization how to expand unsupported
+  // ADDC/ADDE/SUBC/SUBE.  The problem is that these operations generate
+  // a carry of type MVT::Flag, but there doesn't seem to be any way to
+  // generate a value of this type in the expanded code sequence.
+  bool hasCarry =
+    TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+                                   ISD::ADDC : ISD::SUBC,
+                                 TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+  if (hasCarry) {
+    SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+    if (N->getOpcode() == ISD::ADD) {
+      Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+    }
+  } else {
+    if (N->getOpcode() == ISD::ADD) {
+      Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+      SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+                                  ISD::SETULT);
+      SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+                                   DAG.getConstant(1, NVT),
+                                   DAG.getConstant(0, NVT));
+      SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+                                  ISD::SETULT);
+      SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+                                   DAG.getConstant(1, NVT), Carry1);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+    } else {
+      Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+      Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+      SDValue Cmp =
+        DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+                     LoOps[0], LoOps[1], ISD::SETULT);
+      SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+                                   DAG.getConstant(1, NVT),
+                                   DAG.getConstant(0, NVT));
+      Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+    }
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+  SDValue LoOps[2] = { LHSL, RHSL };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  if (N->getOpcode() == ISD::ADDC) {
+    Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+  } else {
+    Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+  }
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+  SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+  HiOps[2] = Lo.getValue(1);
+  Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is any extension of the input (which degenerates to a copy).
+    Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+    Hi = DAG.getUNDEF(NVT);   // The high part is undefined.
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  unsigned NVTBits = NVT.getSizeInBits();
+  unsigned EVTBits = EVT.getSizeInBits();
+
+  if (NVTBits < EVTBits) {
+    Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+  } else {
+    Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+    // The high part replicates the sign bit of Lo, make it explicit.
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                     DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  unsigned NVTBits = NVT.getSizeInBits();
+  unsigned EVTBits = EVT.getSizeInBits();
+
+  if (NVTBits < EVTBits) {
+    Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+  } else {
+    Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+    // The high part must be zero, make it explicit.
+    Hi = DAG.getConstant(0, NVT);
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Hi, Lo);  // Note swapped operands.
+  Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+  Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned NBitWidth = NVT.getSizeInBits();
+  const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+  Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+  Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+
+  SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+                                   DAG.getConstant(0, NVT), ISD::SETNE);
+
+  SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+
+  Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+                   DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+                               DAG.getConstant(NVT.getSizeInBits(), NVT)));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+                   DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+
+  SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+                                   DAG.getConstant(0, NVT), ISD::SETNE);
+
+  SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+
+  Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+                   DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+                               DAG.getConstant(NVT.getSizeInBits(), NVT)));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+  SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+  SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  if (ISD::isNormalLoad(N)) {
+    ExpandRes_NormalLoad(N, Lo, Hi);
+    return;
+  }
+
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  ISD::LoadExtType ExtType = N->getExtensionType();
+  int SVOffset = N->getSrcValueOffset();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  DebugLoc dl = N->getDebugLoc();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  if (N->getMemoryVT().bitsLE(NVT)) {
+    EVT MemVT = N->getMemoryVT();
+
+    Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+                        MemVT, isVolatile, Alignment);
+
+    // Remember the chain.
+    Ch = Lo.getValue(1);
+
+    if (ExtType == ISD::SEXTLOAD) {
+      // The high part is obtained by SRA'ing all but one of the bits of the
+      // lo part.
+      unsigned LoSize = Lo.getValueType().getSizeInBits();
+      Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                       DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+    } else if (ExtType == ISD::ZEXTLOAD) {
+      // The high part is just a zero.
+      Hi = DAG.getConstant(0, NVT);
+    } else {
+      assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+      // The high part is undefined.
+      Hi = DAG.getUNDEF(NVT);
+    }
+  } else if (TLI.isLittleEndian()) {
+    // Little-endian - low bits are at low addresses.
+    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+                     isVolatile, Alignment);
+
+    unsigned ExcessBits =
+      N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+    // Increment the pointer to the other half.
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
+                        SVOffset+IncrementSize, NEVT,
+                        isVolatile, MinAlign(Alignment, IncrementSize));
+
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                     Hi.getValue(1));
+  } else {
+    // Big-endian - high bits are at low addresses.  Favor aligned loads at
+    // the cost of some bit-fiddling.
+    EVT MemVT = N->getMemoryVT();
+    unsigned EBytes = MemVT.getStoreSize();
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+    // Load both the high bits and maybe some of the low bits.
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+                        EVT::getIntegerVT(*DAG.getContext(),
+                                          MemVT.getSizeInBits() - ExcessBits),
+                        isVolatile, Alignment);
+
+    // Increment the pointer to the other half.
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    // Load the rest of the low bits.
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
+                        SVOffset+IncrementSize,
+                        EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+                        isVolatile, MinAlign(Alignment, IncrementSize));
+
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                     Hi.getValue(1));
+
+    if (ExcessBits < NVT.getSizeInBits()) {
+      // Transfer low bits from the bottom of Hi to the top of Lo.
+      Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+                       DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                                   DAG.getConstant(ExcessBits,
+                                                   TLI.getPointerTy())));
+      // Move high bits to the right position in Hi.
+      Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+                       NVT, Hi,
+                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                                       TLI.getPointerTy()));
+    }
+  }
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LL, LH, RL, RH;
+  GetExpandedInteger(N->getOperand(0), LL, LH);
+  GetExpandedInteger(N->getOperand(1), RL, RH);
+  Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+  Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+                                        SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+  bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+  bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+  bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+  if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+    SDValue LL, LH, RL, RH;
+    GetExpandedInteger(N->getOperand(0), LL, LH);
+    GetExpandedInteger(N->getOperand(1), RL, RH);
+    unsigned OuterBitSize = VT.getSizeInBits();
+    unsigned InnerBitSize = NVT.getSizeInBits();
+    unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+    unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+    APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+    if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+        DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+      // The inputs are both zero-extended.
+      if (HasUMUL_LOHI) {
+        // We can emit a umul_lohi.
+        Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+        Hi = SDValue(Lo.getNode(), 1);
+        return;
+      }
+      if (HasMULHU) {
+        // We can emit a mulhu+mul.
+        Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+        Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+        return;
+      }
+    }
+    if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+      // The input values are both sign-extended.
+      if (HasSMUL_LOHI) {
+        // We can emit a smul_lohi.
+        Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+        Hi = SDValue(Lo.getNode(), 1);
+        return;
+      }
+      if (HasMULHS) {
+        // We can emit a mulhs+mul.
+        Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+        Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+        return;
+      }
+    }
+    if (HasUMUL_LOHI) {
+      // Lo,Hi = umul LHS, RHS.
+      SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+                                       DAG.getVTList(NVT, NVT), LL, RL);
+      Lo = UMulLOHI;
+      Hi = UMulLOHI.getValue(1);
+      RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+      LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+      return;
+    }
+    if (HasMULHU) {
+      Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+      Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+      RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+      LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+      return;
+    }
+  }
+
+  // If nothing else, we can make a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::MUL_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::MUL_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::MUL_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::MUL_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::SDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // If we can emit an efficient shift operation, do so now.  Check to see if
+  // the RHS is a constant.
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+  // If we can determine that the high bit of the shift is zero or one, even if
+  // the low bits are variable, emit this shift in an optimized form.
+  if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+    return;
+
+  // If this target supports shift_PARTS, use it.  First, map to the _PARTS opc.
+  unsigned PartsOpc;
+  if (N->getOpcode() == ISD::SHL) {
+    PartsOpc = ISD::SHL_PARTS;
+  } else if (N->getOpcode() == ISD::SRL) {
+    PartsOpc = ISD::SRL_PARTS;
+  } else {
+    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+    PartsOpc = ISD::SRA_PARTS;
+  }
+
+  // Next check to see if the target supports this SHL_PARTS operation or if it
+  // will custom expand it.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+  if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+      Action == TargetLowering::Custom) {
+    // Expand the subcomponents.
+    SDValue LHSL, LHSH;
+    GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+
+    SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
+    EVT VT = LHSL.getValueType();
+    Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+    Hi = Lo.getValue(1);
+    return;
+  }
+
+  // Otherwise, emit a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  bool isSigned;
+  if (N->getOpcode() == ISD::SHL) {
+    isSigned = false; /*sign irrelevant*/
+    if (VT == MVT::i16)
+      LC = RTLIB::SHL_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SHL_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SHL_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SHL_I128;
+  } else if (N->getOpcode() == ISD::SRL) {
+    isSigned = false;
+    if (VT == MVT::i16)
+      LC = RTLIB::SRL_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SRL_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SRL_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SRL_I128;
+  } else {
+    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+    isSigned = true;
+    if (VT == MVT::i16)
+      LC = RTLIB::SRA_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SRA_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SRA_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SRA_I128;
+  }
+
+  if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+    SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+    return;
+  }
+
+  if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+    llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is sign extension of the input (degenerates to a copy).
+    Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+    // The high part is obtained by SRA'ing all but one of the bits of low part.
+    unsigned LoSize = NVT.getSizeInBits();
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                     DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+    unsigned ExcessBits =
+      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+  }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+  if (EVT.bitsLE(Lo.getValueType())) {
+    // sext_inreg the low part if needed.
+    Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+                     N->getOperand(1));
+
+    // The high part gets the sign extension from the lo-part.  This handles
+    // things like sextinreg V:i64 from i8.
+    Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+                     DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+                                     TLI.getPointerTy()));
+  } else {
+    // For example, extension of an i48 to an i64.  Leave the low part alone,
+    // sext_inreg the high part.
+    unsigned ExcessBits =
+      EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+    Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::SREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+  Hi = DAG.getNode(ISD::SRL, dl,
+                   N->getOperand(0).getValueType(), N->getOperand(0),
+                   DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+  Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::UDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::UDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::UDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::UDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::UREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::UREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::UREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::UREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is zero extension of the input (degenerates to a copy).
+    Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+    Hi = DAG.getConstant(0, NVT);   // The high part is just a zero.
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+    unsigned ExcessBits =
+      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+  default:
+  #ifndef NDEBUG
+    dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+  #endif
+    llvm_unreachable("Do not know how to expand this operator's operand!");
+
+  case ISD::BIT_CONVERT:       Res = ExpandOp_BIT_CONVERT(N); break;
+  case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
+  case ISD::BUILD_VECTOR:      Res = ExpandOp_BUILD_VECTOR(N); break;
+  case ISD::EXTRACT_ELEMENT:   Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+  case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+  case ISD::SCALAR_TO_VECTOR:  Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+  case ISD::SELECT_CC:         Res = ExpandIntOp_SELECT_CC(N); break;
+  case ISD::SETCC:             Res = ExpandIntOp_SETCC(N); break;
+  case ISD::SINT_TO_FP:        Res = ExpandIntOp_SINT_TO_FP(N); break;
+  case ISD::STORE:   Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+  case ISD::TRUNCATE:          Res = ExpandIntOp_TRUNCATE(N); break;
+  case ISD::UINT_TO_FP:        Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:              Res = ExpandIntOp_Shift(N); break;
+  case ISD::RETURNADDR:
+  case ISD::FRAMEADDR:         Res = ExpandIntOp_RETURNADDR(N); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison.  This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+                                                  SDValue &NewRHS,
+                                                  ISD::CondCode &CCCode,
+                                                  DebugLoc dl) {
+  SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+  GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+  GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+  if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+    if (RHSLo == RHSHi) {
+      if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+        if (RHSCST->isAllOnesValue()) {
+          // Equality comparison to -1.
+          NewLHS = DAG.getNode(ISD::AND, dl,
+                               LHSLo.getValueType(), LHSLo, LHSHi);
+          NewRHS = RHSLo;
+          return;
+        }
+      }
+    }
+
+    NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+    NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+    NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    return;
+  }
+
+  // If this is a comparison of the sign bit, just look at the top part.
+  // X > -1,  x < 0
+  if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+    if ((CCCode == ISD::SETLT && CST->isNullValue()) ||     // X < 0
+        (CCCode == ISD::SETGT && CST->isAllOnesValue())) {  // X > -1
+      NewLHS = LHSHi;
+      NewRHS = RHSHi;
+      return;
+    }
+
+  // FIXME: This generated code sucks.
+  ISD::CondCode LowCC;
+  switch (CCCode) {
+  default: llvm_unreachable("Unknown integer setcc!");
+  case ISD::SETLT:
+  case ISD::SETULT: LowCC = ISD::SETULT; break;
+  case ISD::SETGT:
+  case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+  case ISD::SETLE:
+  case ISD::SETULE: LowCC = ISD::SETULE; break;
+  case ISD::SETGE:
+  case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+  }
+
+  // Tmp1 = lo(op1) < lo(op2)   // Always unsigned comparison
+  // Tmp2 = hi(op1) < hi(op2)   // Signedness depends on operands
+  // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+  // NOTE: on targets without efficient SELECT of bools, we can always use
+  // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
+  SDValue Tmp1, Tmp2;
+  Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+                           LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+  if (!Tmp1.getNode())
+    Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+                        LHSLo, RHSLo, LowCC);
+  Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+                           LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+  if (!Tmp2.getNode())
+    Tmp2 = DAG.getNode(ISD::SETCC, dl,
+                       TLI.getSetCCResultType(LHSHi.getValueType()),
+                       LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+  ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+  ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+  if ((Tmp1C && Tmp1C->isNullValue()) ||
+      (Tmp2C && Tmp2C->isNullValue() &&
+       (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+        CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+      (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+       (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+        CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+    // low part is known false, returns high part.
+    // For LE / GE, if high part is known false, ignore the low part.
+    // For LT / GT, if high part is known true, ignore the low part.
+    NewLHS = Tmp2;
+    NewRHS = SDValue();
+    return;
+  }
+
+  NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+                             LHSHi, RHSHi, ISD::SETEQ, false,
+                             DagCombineInfo, dl);
+  if (!NewLHS.getNode())
+    NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                          LHSHi, RHSHi, ISD::SETEQ);
+  NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+                       NewLHS, Tmp1, Tmp2);
+  NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+  // The value being shifted is legal, but the shift amount is too big.
+  // It follows that either the result of the shift is undefined, or the
+  // upper half of the shift amount is zero.  Just use the lower half.
+  SDValue Lo, Hi;
+  GetExpandedInteger(N->getOperand(1), Lo, Hi);
+  return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+  // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant.  This
+  // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+  // constant to valid type.
+  SDValue Lo, Hi;
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  return DAG.UpdateNodeOperands(SDValue(N, 0), Lo);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+  SDValue Op = N->getOperand(0);
+  EVT DstVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Don't know how to expand this SINT_TO_FP!");
+  return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+  if (ISD::isNormalStore(N))
+    return ExpandOp_NormalStore(N, OpNo);
+
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+
+  EVT VT = N->getOperand(1).getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  int SVOffset = N->getSrcValueOffset();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Lo, Hi;
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  if (N->getMemoryVT().bitsLE(NVT)) {
+    GetExpandedInteger(N->getValue(), Lo, Hi);
+    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+                             N->getMemoryVT(), isVolatile, Alignment);
+  } else if (TLI.isLittleEndian()) {
+    // Little-endian - low bits are at low addresses.
+    GetExpandedInteger(N->getValue(), Lo, Hi);
+
+    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+                      isVolatile, Alignment);
+
+    unsigned ExcessBits =
+      N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+    // Increment the pointer to the other half.
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
+                           SVOffset+IncrementSize, NEVT,
+                           isVolatile, MinAlign(Alignment, IncrementSize));
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+  } else {
+    // Big-endian - high bits are at low addresses.  Favor aligned stores at
+    // the cost of some bit-fiddling.
+    GetExpandedInteger(N->getValue(), Lo, Hi);
+
+    EVT ExtVT = N->getMemoryVT();
+    unsigned EBytes = ExtVT.getStoreSize();
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    unsigned ExcessBits = (EBytes - IncrementSize)*8;
+    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits);
+
+    if (ExcessBits < NVT.getSizeInBits()) {
+      // Transfer high bits from the top of Lo to the bottom of Hi.
+      Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                                       TLI.getPointerTy()));
+      Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+                       DAG.getNode(ISD::SRL, dl, NVT, Lo,
+                                   DAG.getConstant(ExcessBits,
+                                                   TLI.getPointerTy())));
+    }
+
+    // Store both the high bits and maybe some of the low bits.
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
+                           SVOffset, HiVT, isVolatile, Alignment);
+
+    // Increment the pointer to the other half.
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    // Store the lowest ExcessBits bits in the second half.
+    Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
+                           SVOffset+IncrementSize,
+                           EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+                           isVolatile, MinAlign(Alignment, IncrementSize));
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+  }
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+  // Just truncate the low part of the source.
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+  SDValue Op = N->getOperand(0);
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+    // Do a signed conversion then adjust the result.
+    SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+    SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+    // The result of the signed conversion needs adjusting if the 'sign bit' of
+    // the incoming integer was set.  To handle this, we dynamically test to see
+    // if it is set, and, if so, add a fudge factor.
+
+    const uint64_t F32TwoE32  = 0x4F800000ULL;
+    const uint64_t F32TwoE64  = 0x5F800000ULL;
+    const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+    APInt FF(32, 0);
+    if (SrcVT == MVT::i32)
+      FF = APInt(32, F32TwoE32);
+    else if (SrcVT == MVT::i64)
+      FF = APInt(32, F32TwoE64);
+    else if (SrcVT == MVT::i128)
+      FF = APInt(32, F32TwoE128);
+    else
+      assert(false && "Unsupported UINT_TO_FP!");
+
+    // Check whether the sign bit is set.
+    SDValue Lo, Hi;
+    GetExpandedInteger(Op, Lo, Hi);
+    SDValue SignSet = DAG.getSetCC(dl,
+                                   TLI.getSetCCResultType(Hi.getValueType()),
+                                   Hi, DAG.getConstant(0, Hi.getValueType()),
+                                   ISD::SETLT);
+
+    // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+    SDValue FudgePtr = DAG.getConstantPool(
+                               ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+                                           TLI.getPointerTy());
+
+    // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+    SDValue Zero = DAG.getIntPtrConstant(0);
+    SDValue Four = DAG.getIntPtrConstant(4);
+    if (TLI.isBigEndian()) std::swap(Zero, Four);
+    SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+                                 Zero, Four);
+    unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+    FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+    Alignment = std::min(Alignment, 4u);
+
+    // Load the value out, extending it from f32 to the destination float type.
+    // FIXME: Avoid the extend by constructing the right constant pool?
+    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+                                   FudgePtr, NULL, 0, MVT::f32,
+                                   false, Alignment);
+    return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+  }
+
+  // Otherwise, use a libcall.
+  RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Don't know how to expand this UINT_TO_FP!");
+  return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 0000000..37f36a3
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1102 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method.  It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types.  This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+  // If a node is not processed, then none of its values should be mapped by any
+  // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+  // If a node is processed, then each value with an illegal type must be mapped
+  // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+  // Values with a legal type may be mapped by ReplacedValues, but not by any of
+  // the other maps.
+
+  // Note that these invariants may not hold momentarily when processing a node:
+  // the node being processed may be put in a map before being marked Processed.
+
+  // Note that it is possible to have nodes marked NewNode in the DAG.  This can
+  // occur in two ways.  Firstly, a node may be created during legalization but
+  // never passed to the legalization core.  This is usually due to the implicit
+  // folding that occurs when using the DAG.getNode operators.  Secondly, a new
+  // node may be passed to the legalization core, but when analyzed may morph
+  // into a different node, leaving the original node as a NewNode in the DAG.
+  // A node may morph if one of its operands changes during analysis.  Whether
+  // it actually morphs or not depends on whether, after updating its operands,
+  // it is equivalent to an existing node: if so, it morphs into that existing
+  // node (CSE).  An operand can change during analysis if the operand is a new
+  // node that morphs, or it is a processed value that was mapped to some other
+  // value (as recorded in ReplacedValues) in which case the operand is turned
+  // into that other value.  If a node morphs then the node it morphed into will
+  // be used instead of it for legalization, however the original node continues
+  // to live on in the DAG.
+  // The conclusion is that though there may be nodes marked NewNode in the DAG,
+  // all uses of such nodes are also marked NewNode: the result is a fungus of
+  // NewNodes growing on top of the useful nodes, and perhaps using them, but
+  // not used by them.
+
+  // If a value is mapped by ReplacedValues, then it must have no uses, except
+  // by nodes marked NewNode (see above).
+
+  // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+  // Note that ReplacedValues should be applied iteratively.
+
+  // Note that the ReplacedValues map may also map deleted nodes (by iterating
+  // over the DAG we never dereference deleted nodes).  This means that it may
+  // also map nodes marked NewNode if the deallocated memory was reallocated as
+  // another node, and that new node was not seen by the LegalizeTypes machinery
+  // (for example because it was created but not used).  In general, we cannot
+  // distinguish between new nodes and deleted nodes.
+  SmallVector<SDNode*, 16> NewNodes;
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    // Remember nodes marked NewNode - they are subject to extra checking below.
+    if (I->getNodeId() == NewNode)
+      NewNodes.push_back(I);
+
+    for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+      SDValue Res(I, i);
+      bool Failed = false;
+
+      unsigned Mapped = 0;
+      if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+        Mapped |= 1;
+        // Check that remapped values are only used by nodes marked NewNode.
+        for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+             UI != UE; ++UI)
+          if (UI.getUse().getResNo() == i)
+            assert(UI->getNodeId() == NewNode &&
+                   "Remapped value has non-trivial use!");
+
+        // Check that the final result of applying ReplacedValues is not
+        // marked NewNode.
+        SDValue NewVal = ReplacedValues[Res];
+        DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+        while (I != ReplacedValues.end()) {
+          NewVal = I->second;
+          I = ReplacedValues.find(NewVal);
+        }
+        assert(NewVal.getNode()->getNodeId() != NewNode &&
+               "ReplacedValues maps to a new node!");
+      }
+      if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+        Mapped |= 2;
+      if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+        Mapped |= 4;
+      if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+        Mapped |= 8;
+      if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+        Mapped |= 16;
+      if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+        Mapped |= 32;
+      if (SplitVectors.find(Res) != SplitVectors.end())
+        Mapped |= 64;
+      if (WidenedVectors.find(Res) != WidenedVectors.end())
+        Mapped |= 128;
+
+      if (I->getNodeId() != Processed) {
+        // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+        // marked NewNode too, since a deleted node may have been reallocated as
+        // another node that has not been seen by the LegalizeTypes machinery.
+        if ((I->getNodeId() == NewNode && Mapped > 1) ||
+            (I->getNodeId() != NewNode && Mapped != 0)) {
+          dbgs() << "Unprocessed value in a map!";
+          Failed = true;
+        }
+      } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+        if (Mapped > 1) {
+          dbgs() << "Value with legal type was transformed!";
+          Failed = true;
+        }
+      } else {
+        if (Mapped == 0) {
+          dbgs() << "Processed value not in any map!";
+          Failed = true;
+        } else if (Mapped & (Mapped - 1)) {
+          dbgs() << "Value in multiple maps!";
+          Failed = true;
+        }
+      }
+
+      if (Failed) {
+        if (Mapped & 1)
+          dbgs() << " ReplacedValues";
+        if (Mapped & 2)
+          dbgs() << " PromotedIntegers";
+        if (Mapped & 4)
+          dbgs() << " SoftenedFloats";
+        if (Mapped & 8)
+          dbgs() << " ScalarizedVectors";
+        if (Mapped & 16)
+          dbgs() << " ExpandedIntegers";
+        if (Mapped & 32)
+          dbgs() << " ExpandedFloats";
+        if (Mapped & 64)
+          dbgs() << " SplitVectors";
+        if (Mapped & 128)
+          dbgs() << " WidenedVectors";
+        dbgs() << "\n";
+        llvm_unreachable(0);
+      }
+    }
+  }
+
+  // Checked that NewNodes are only used by other NewNodes.
+  for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+    SDNode *N = NewNodes[i];
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI)
+      assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+  }
+}
+
+/// run - This is the main entry point for the type legalizer.  This does a
+/// top-down traversal of the dag, legalizing types as it goes.  Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+  bool Changed = false;
+
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted, and tracking any
+  // changes of the root.
+  HandleSDNode Dummy(DAG.getRoot());
+  Dummy.setNodeId(Unanalyzed);
+
+  // The root of the dag may dangle to deleted nodes until the type legalizer is
+  // done.  Set it to null to avoid confusion.
+  DAG.setRoot(SDValue());
+
+  // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+  // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+  // non-leaves.
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    if (I->getNumOperands() == 0) {
+      I->setNodeId(ReadyToProcess);
+      Worklist.push_back(I);
+    } else {
+      I->setNodeId(Unanalyzed);
+    }
+  }
+
+  // Now that we have a set of nodes to process, handle them all.
+  while (!Worklist.empty()) {
+#ifndef XDEBUG
+    if (EnableExpensiveChecks)
+#endif
+      PerformExpensiveChecks();
+
+    SDNode *N = Worklist.back();
+    Worklist.pop_back();
+    assert(N->getNodeId() == ReadyToProcess &&
+           "Node should be ready if on worklist!");
+
+    if (IgnoreNodeResults(N))
+      goto ScanOperands;
+
+    // Scan the values produced by the node, checking to see if any result
+    // types are illegal.
+    for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+      EVT ResultVT = N->getValueType(i);
+      switch (getTypeAction(ResultVT)) {
+      default:
+        assert(false && "Unknown action!");
+      case Legal:
+        break;
+      // The following calls must take care of *all* of the node's results,
+      // not just the illegal result they were passed (this includes results
+      // with a legal type).  Results can be remapped using ReplaceValueWith,
+      // or their promoted/expanded/etc values registered in PromotedIntegers,
+      // ExpandedIntegers etc.
+      case PromoteInteger:
+        PromoteIntegerResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case ExpandInteger:
+        ExpandIntegerResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case SoftenFloat:
+        SoftenFloatResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case ExpandFloat:
+        ExpandFloatResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case ScalarizeVector:
+        ScalarizeVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case SplitVector:
+        SplitVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case WidenVector:
+        WidenVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      }
+    }
+
+ScanOperands:
+    // Scan the operand list for the node, handling any nodes with operands that
+    // are illegal.
+    {
+    unsigned NumOperands = N->getNumOperands();
+    bool NeedsReanalyzing = false;
+    unsigned i;
+    for (i = 0; i != NumOperands; ++i) {
+      if (IgnoreNodeResults(N->getOperand(i).getNode()))
+        continue;
+
+      EVT OpVT = N->getOperand(i).getValueType();
+      switch (getTypeAction(OpVT)) {
+      default:
+        assert(false && "Unknown action!");
+      case Legal:
+        continue;
+      // The following calls must either replace all of the node's results
+      // using ReplaceValueWith, and return "false"; or update the node's
+      // operands in place, and return "true".
+      case PromoteInteger:
+        NeedsReanalyzing = PromoteIntegerOperand(N, i);
+        Changed = true;
+        break;
+      case ExpandInteger:
+        NeedsReanalyzing = ExpandIntegerOperand(N, i);
+        Changed = true;
+        break;
+      case SoftenFloat:
+        NeedsReanalyzing = SoftenFloatOperand(N, i);
+        Changed = true;
+        break;
+      case ExpandFloat:
+        NeedsReanalyzing = ExpandFloatOperand(N, i);
+        Changed = true;
+        break;
+      case ScalarizeVector:
+        NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+        Changed = true;
+        break;
+      case SplitVector:
+        NeedsReanalyzing = SplitVectorOperand(N, i);
+        Changed = true;
+        break;
+      case WidenVector:
+        NeedsReanalyzing = WidenVectorOperand(N, i);
+        Changed = true;
+        break;
+      }
+      break;
+    }
+
+    // The sub-method updated N in place.  Check to see if any operands are new,
+    // and if so, mark them.  If the node needs revisiting, don't add all users
+    // to the worklist etc.
+    if (NeedsReanalyzing) {
+      assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+      N->setNodeId(NewNode);
+      // Recompute the NodeId and correct processed operands, adding the node to
+      // the worklist if ready.
+      SDNode *M = AnalyzeNewNode(N);
+      if (M == N)
+        // The node didn't morph - nothing special to do, it will be revisited.
+        continue;
+
+      // The node morphed - this is equivalent to legalizing by replacing every
+      // value of N with the corresponding value of M.  So do that now.
+      assert(N->getNumValues() == M->getNumValues() &&
+             "Node morphing changed the number of results!");
+      for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+        // Replacing the value takes care of remapping the new value.
+        ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+      assert(N->getNodeId() == NewNode && "Unexpected node state!");
+      // The node continues to live on as part of the NewNode fungus that
+      // grows on top of the useful nodes.  Nothing more needs to be done
+      // with it - move on to the next node.
+      continue;
+    }
+
+    if (i == NumOperands) {
+      DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+    }
+    }
+NodeDone:
+
+    // If we reach here, the node was processed, potentially creating new nodes.
+    // Mark it as processed and add its users to the worklist as appropriate.
+    assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+    N->setNodeId(Processed);
+
+    for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+         UI != E; ++UI) {
+      SDNode *User = *UI;
+      int NodeId = User->getNodeId();
+
+      // This node has two options: it can either be a new node or its Node ID
+      // may be a count of the number of operands it has that are not ready.
+      if (NodeId > 0) {
+        User->setNodeId(NodeId-1);
+
+        // If this was the last use it was waiting on, add it to the ready list.
+        if (NodeId-1 == ReadyToProcess)
+          Worklist.push_back(User);
+        continue;
+      }
+
+      // If this is an unreachable new node, then ignore it.  If it ever becomes
+      // reachable by being used by a newly created node then it will be handled
+      // by AnalyzeNewNode.
+      if (NodeId == NewNode)
+        continue;
+
+      // Otherwise, this node is new: this is the first operand of it that
+      // became ready.  Its new NodeId is the number of operands it has minus 1
+      // (as this node is now processed).
+      assert(NodeId == Unanalyzed && "Unknown node ID!");
+      User->setNodeId(User->getNumOperands() - 1);
+
+      // If the node only has a single operand, it is now ready.
+      if (User->getNumOperands() == 1)
+        Worklist.push_back(User);
+    }
+  }
+
+#ifndef XDEBUG
+  if (EnableExpensiveChecks)
+#endif
+    PerformExpensiveChecks();
+
+  // If the root changed (e.g. it was a dead load) update the root.
+  DAG.setRoot(Dummy.getValue());
+
+  // Remove dead nodes.  This is important to do for cleanliness but also before
+  // the checking loop below.  Implicit folding by the DAG.getNode operators and
+  // node morphing can cause unreachable nodes to be around with their flags set
+  // to new.
+  DAG.RemoveDeadNodes();
+
+  // In a debug build, scan all the nodes to make sure we found them all.  This
+  // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    bool Failed = false;
+
+    // Check that all result types are legal.
+    if (!IgnoreNodeResults(I))
+      for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+        if (!isTypeLegal(I->getValueType(i))) {
+          dbgs() << "Result type " << i << " illegal!\n";
+          Failed = true;
+        }
+
+    // Check that all operand types are legal.
+    for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+      if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+          !isTypeLegal(I->getOperand(i).getValueType())) {
+        dbgs() << "Operand type " << i << " illegal!\n";
+        Failed = true;
+      }
+
+    if (I->getNodeId() != Processed) {
+       if (I->getNodeId() == NewNode)
+         dbgs() << "New node not analyzed?\n";
+       else if (I->getNodeId() == Unanalyzed)
+         dbgs() << "Unanalyzed node not noticed?\n";
+       else if (I->getNodeId() > 0)
+         dbgs() << "Operand not processed?\n";
+       else if (I->getNodeId() == ReadyToProcess)
+         dbgs() << "Not added to worklist?\n";
+       Failed = true;
+    }
+
+    if (Failed) {
+      I->dump(&DAG); dbgs() << "\n";
+      llvm_unreachable(0);
+    }
+  }
+#endif
+
+  return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes.  Correct any processed operands (this may change the node) and
+/// calculate the NodeId.  If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+  // If this was an existing node that is already done, we're done.
+  if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+    return N;
+
+  // Remove any stale map entries.
+  ExpungeNode(N);
+
+  // Okay, we know that this node is new.  Recursively walk all of its operands
+  // to see if they are new also.  The depth of this walk is bounded by the size
+  // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+  // about revisiting of nodes.
+  //
+  // As we walk the operands, keep track of the number of nodes that are
+  // processed.  If non-zero, this will become the new nodeid of this node.
+  // Operands may morph when they are analyzed.  If so, the node will be
+  // updated after all operands have been analyzed.  Since this is rare,
+  // the code tries to minimize overhead in the non-morphing case.
+
+  SmallVector<SDValue, 8> NewOps;
+  unsigned NumProcessed = 0;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue OrigOp = N->getOperand(i);
+    SDValue Op = OrigOp;
+
+    AnalyzeNewValue(Op); // Op may morph.
+
+    if (Op.getNode()->getNodeId() == Processed)
+      ++NumProcessed;
+
+    if (!NewOps.empty()) {
+      // Some previous operand changed.  Add this one to the list.
+      NewOps.push_back(Op);
+    } else if (Op != OrigOp) {
+      // This is the first operand to change - add all operands so far.
+      NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
+      NewOps.push_back(Op);
+    }
+  }
+
+  // Some operands changed - update the node.
+  if (!NewOps.empty()) {
+    SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0],
+                                       NewOps.size()).getNode();
+    if (M != N) {
+      // The node morphed into a different node.  Normally for this to happen
+      // the original node would have to be marked NewNode.  However this can
+      // in theory momentarily not be the case while ReplaceValueWith is doing
+      // its stuff.  Mark the original node NewNode to help sanity checking.
+      N->setNodeId(NewNode);
+      if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+        // It morphed into a previously analyzed node - nothing more to do.
+        return M;
+
+      // It morphed into a different new node.  Do the equivalent of passing
+      // it to AnalyzeNewNode: expunge it and calculate the NodeId.  No need
+      // to remap the operands, since they are the same as the operands we
+      // remapped above.
+      N = M;
+      ExpungeNode(N);
+    }
+  }
+
+  // Calculate the NodeId.
+  N->setNodeId(N->getNumOperands() - NumProcessed);
+  if (N->getNodeId() == ReadyToProcess)
+    Worklist.push_back(N);
+
+  return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+  Val.setNode(AnalyzeNewNode(Val.getNode()));
+  if (Val.getNode()->getNodeId() == Processed)
+    // We were passed a processed node, or it morphed into one - remap it.
+    RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings.  In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+  if (N->getNodeId() != NewNode)
+    return;
+
+  // If N is not remapped by ReplacedValues then there is nothing to do.
+  unsigned i, e;
+  for (i = 0, e = N->getNumValues(); i != e; ++i)
+    if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+      break;
+
+  if (i == e)
+    return;
+
+  // Remove N from all maps - this is expensive but rare.
+
+  for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+       E = PromotedIntegers.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+       E = SoftenedFloats.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+       E = ScalarizedVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+       E = WidenedVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+       E = ReplacedValues.end(); I != E; ++I)
+    RemapValue(I->second);
+
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+    ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+  DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+  if (I != ReplacedValues.end()) {
+    // Use path compression to speed up future lookups if values get multiply
+    // replaced with other values.
+    RemapValue(I->second);
+    N = I->second;
+    assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+  }
+}
+
+namespace {
+  /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+  /// updates to nodes and recomputes their ready state.
+  class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+    DAGTypeLegalizer &DTL;
+    SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+  public:
+    explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+                                SmallSetVector<SDNode*, 16> &nta)
+      : DTL(dtl), NodesToAnalyze(nta) {}
+
+    virtual void NodeDeleted(SDNode *N, SDNode *E) {
+      assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+             N->getNodeId() != DAGTypeLegalizer::Processed &&
+             "Invalid node ID for RAUW deletion!");
+      // It is possible, though rare, for the deleted node N to occur as a
+      // target in a map, so note the replacement N -> E in ReplacedValues.
+      assert(E && "Node not replaced?");
+      DTL.NoteDeletion(N, E);
+
+      // In theory the deleted node could also have been scheduled for analysis.
+      // So remove it from the set of nodes which will be analyzed.
+      NodesToAnalyze.remove(N);
+
+      // In general nothing needs to be done for E, since it didn't change but
+      // only gained new uses.  However N -> E was just added to ReplacedValues,
+      // and the result of a ReplacedValues mapping is not allowed to be marked
+      // NewNode.  So if E is marked NewNode, then it needs to be analyzed.
+      if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+        NodesToAnalyze.insert(E);
+    }
+
+    virtual void NodeUpdated(SDNode *N) {
+      // Node updates can mean pretty much anything.  It is possible that an
+      // operand was set to something already processed (f.e.) in which case
+      // this node could become ready.  Recompute its flags.
+      assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+             N->getNodeId() != DAGTypeLegalizer::Processed &&
+             "Invalid node ID for RAUW deletion!");
+      N->setNodeId(DAGTypeLegalizer::NewNode);
+      NodesToAnalyze.insert(N);
+    }
+  };
+}
+
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value.  Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+  assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+  // If expansion produced new nodes, make sure they are properly marked.
+  ExpungeNode(From.getNode());
+  AnalyzeNewValue(To); // Expunges To.
+
+  // Anything that used the old node should now use the new one.  Note that this
+  // can potentially cause recursive merging.
+  SmallSetVector<SDNode*, 16> NodesToAnalyze;
+  NodeUpdateListener NUL(*this, NodesToAnalyze);
+  DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+  // The old node may still be present in a map like ExpandedIntegers or
+  // PromotedIntegers.  Inform maps about the replacement.
+  ReplacedValues[From] = To;
+
+  // Process the list of nodes that need to be reanalyzed.
+  while (!NodesToAnalyze.empty()) {
+    SDNode *N = NodesToAnalyze.back();
+    NodesToAnalyze.pop_back();
+    if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+      // The node was analyzed while reanalyzing an earlier node - it is safe to
+      // skip.  Note that this is not a morphing node - otherwise it would still
+      // be marked NewNode.
+      continue;
+
+    // Analyze the node's operands and recalculate the node ID.
+    SDNode *M = AnalyzeNewNode(N);
+    if (M != N) {
+      // The node morphed into a different node.  Make everyone use the new node
+      // instead.
+      assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+      assert(N->getNumValues() == M->getNumValues() &&
+             "Node morphing changed the number of results!");
+      for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+        SDValue OldVal(N, i);
+        SDValue NewVal(M, i);
+        if (M->getNodeId() == Processed)
+          RemapValue(NewVal);
+        DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+      }
+      // The original node continues to exist in the DAG, marked NewNode.
+    }
+  }
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for promoted integer");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = PromotedIntegers[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for softened float");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = SoftenedFloats[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+         "Invalid type for scalarized vector");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = ScalarizedVectors[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+                                          SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't expanded");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+                                          SDValue Hi) {
+  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded integer");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+  assert(Entry.first.getNode() == 0 && "Node already expanded");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+                                        SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't expanded");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+                                        SDValue Hi) {
+  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded float");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+  assert(Entry.first.getNode() == 0 && "Node already expanded");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+                                      SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't split");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+                                      SDValue Hi) {
+  assert(Lo.getValueType().getVectorElementType() ==
+         Op.getValueType().getVectorElementType() &&
+         2*Lo.getValueType().getVectorNumElements() ==
+         Op.getValueType().getVectorNumElements() &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for split vector");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+  assert(Entry.first.getNode() == 0 && "Node already split");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for widened vector");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = WidenedVectors[Op];
+  assert(OpEntry.getNode() == 0 && "Node already widened!");
+  OpEntry = Result;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+  unsigned BitWidth = Op.getValueType().getSizeInBits();
+  return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+                     EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Only applies to vectors!");
+  unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+  EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+  unsigned NumElts = Op.getValueType().getVectorNumElements();
+  return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+                     EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+                                               EVT DestVT) {
+  DebugLoc dl = Op.getDebugLoc();
+  // Create the stack frame object.  Make sure it is aligned for both
+  // the source and destination types.
+  SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+  // Emit a store to the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);
+  // Result is a load from the stack slot.
+  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+  // See if the target wants to custom lower this node.
+  if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+    return false;
+
+  SmallVector<SDValue, 8> Results;
+  if (LegalizeResult)
+    TLI.ReplaceNodeResults(N, Results, DAG);
+  else
+    TLI.LowerOperationWrapper(N, Results, DAG);
+
+  if (Results.empty())
+    // The target didn't want to custom lower it after all.
+    return false;
+
+  // Make everything that once used N's values now use those in Results instead.
+  assert(Results.size() == N->getNumValues() &&
+         "Custom lowering returned the wrong number of results!");
+  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+    ReplaceValueWith(SDValue(N, i), Results[i]);
+  return true;
+}
+
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+  // See if the target wants to custom lower this node.
+  if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+    return false;
+
+  SmallVector<SDValue, 8> Results;
+  TLI.ReplaceNodeResults(N, Results, DAG);
+
+  if (Results.empty())
+    // The target didn't want to custom widen lower its result  after all.
+    return false;
+
+  // Update the widening map.
+  assert(Results.size() == N->getNumValues() &&
+         "Custom lowering returned the wrong number of results!");
+  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+    SetWidenedVector(SDValue(N, i), Results[i]);
+  return true;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+  // Currently all types are split in half.
+  if (!InVT.isVector()) {
+    LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  } else {
+    unsigned NumElements = InVT.getVectorNumElements();
+    assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2);
+  }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+                                       SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = Pair.getDebugLoc();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+  Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+                   DAG.getIntPtrConstant(0));
+  Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+                   DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
+                                                  SDValue Index) {
+  DebugLoc dl = Index.getDebugLoc();
+  // Make sure the index type is big enough to compute in.
+  if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+    Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+  else
+    Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+  // Calculate the element offset and add it to the pointer.
+  unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+  Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+                      DAG.getConstant(EltSize, Index.getValueType()));
+  return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+  // Arbitrarily use dlHi for result DebugLoc
+  DebugLoc dlHi = Hi.getDebugLoc();
+  DebugLoc dlLo = Lo.getDebugLoc();
+  EVT LVT = Lo.getValueType();
+  EVT HVT = Hi.getValueType();
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits());
+
+  Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+  Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+  Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+                   DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+  return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+                                     bool isSigned) {
+  unsigned NumOps = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
+  if (NumOps == 0) {
+    return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+  } else if (NumOps == 1) {
+    SDValue Op = N->getOperand(0);
+    return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+  } else if (NumOps == 2) {
+    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+    return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+  }
+  SmallVector<SDValue, 8> Ops(NumOps);
+  for (unsigned i = 0; i < NumOps; ++i)
+    Ops[i] = N->getOperand(i);
+
+  return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
+}
+
+/// MakeLibCall - Generate a libcall taking the given operands as arguments and
+/// returning a result of type RetVT.
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+                                      const SDValue *Ops, unsigned NumOps,
+                                      bool isSigned, DebugLoc dl) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue,SDValue> CallInfo =
+    TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                    false, 0, TLI.getLibcallCallingConv(LC), false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl,
+                    DAG.GetOrdering(DAG.getEntryNode().getNode()));
+  return CallInfo.first;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type.  A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+  DebugLoc dl = Bool.getDebugLoc();
+  ISD::NodeType ExtendCode;
+  switch (TLI.getBooleanContents()) {
+  default:
+    assert(false && "Unknown BooleanContent!");
+  case TargetLowering::UndefinedBooleanContent:
+    // Extend to VT by adding rubbish bits.
+    ExtendCode = ISD::ANY_EXTEND;
+    break;
+  case TargetLowering::ZeroOrOneBooleanContent:
+    // Extend to VT by adding zero bits.
+    ExtendCode = ISD::ZERO_EXTEND;
+    break;
+  case TargetLowering::ZeroOrNegativeOneBooleanContent: {
+    // Extend to VT by copying the sign bit.
+    ExtendCode = ISD::SIGN_EXTEND;
+    break;
+  }
+  }
+  return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+                                    EVT LoVT, EVT HiVT,
+                                    SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = Op.getDebugLoc();
+  assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+         Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+  Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+  Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+                   DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+  Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+                                    SDValue &Lo, SDValue &Hi) {
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2);
+  SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target.  Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+  return DAGTypeLegalizer(*this).run();
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 0000000..b0af357
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,736 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class.  This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left.  This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class VISIBILITY_HIDDEN DAGTypeLegalizer {
+  TargetLowering &TLI;
+  SelectionDAG &DAG;
+public:
+  // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+  // about the state of the node.  The enum has all the values.
+  enum NodeIdFlags {
+    /// ReadyToProcess - All operands have been processed, so this node is ready
+    /// to be handled.
+    ReadyToProcess = 0,
+
+    /// NewNode - This is a new node, not before seen, that was created in the
+    /// process of legalizing some other node.
+    NewNode = -1,
+
+    /// Unanalyzed - This node's ID needs to be set to the number of its
+    /// unprocessed operands.
+    Unanalyzed = -2,
+
+    /// Processed - This is a node that has already been processed.
+    Processed = -3
+
+    // 1+ - This is a node which has this many unprocessed operands.
+  };
+private:
+  enum LegalizeAction {
+    Legal,           // The target natively supports this type.
+    PromoteInteger,  // Replace this integer type with a larger one.
+    ExpandInteger,   // Split this integer type into two of half the size.
+    SoftenFloat,     // Convert this float type to a same size integer type.
+    ExpandFloat,     // Split this float type into two of half the size.
+    ScalarizeVector, // Replace this one-element vector with its element type.
+    SplitVector,     // Split this vector type into two of half the size.
+    WidenVector      // This vector type should be widened into a larger vector.
+  };
+
+  /// ValueTypeActions - This is a bitvector that contains two bits for each
+  /// simple value type, where the two bits correspond to the LegalizeAction
+  /// enum from TargetLowering.  This can be queried with "getTypeAction(VT)".
+  TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+  /// getTypeAction - Return how we should legalize values of this type.
+  LegalizeAction getTypeAction(EVT VT) const {
+    switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) {
+    default:
+      assert(false && "Unknown legalize action!");
+    case TargetLowering::Legal:
+      return Legal;
+    case TargetLowering::Promote:
+      // Promote can mean
+      //   1) For integers, use a larger integer type (e.g. i8 -> i32).
+      //   2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
+      if (!VT.isVector())
+        return PromoteInteger;
+      else
+        return WidenVector;
+    case TargetLowering::Expand:
+      // Expand can mean
+      // 1) split scalar in half, 2) convert a float to an integer,
+      // 3) scalarize a single-element vector, 4) split a vector in two.
+      if (!VT.isVector()) {
+        if (VT.isInteger())
+          return ExpandInteger;
+        else if (VT.getSizeInBits() ==
+                 TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
+          return SoftenFloat;
+        else
+          return ExpandFloat;
+      } else if (VT.getVectorNumElements() == 1) {
+        return ScalarizeVector;
+      } else {
+        return SplitVector;
+      }
+    }
+  }
+
+  /// isTypeLegal - Return true if this type is legal on this target.
+  bool isTypeLegal(EVT VT) const {
+    return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) == 
+            TargetLowering::Legal);
+  }
+
+  /// IgnoreNodeResults - Pretend all of this node's results are legal.
+  bool IgnoreNodeResults(SDNode *N) const {
+    return N->getOpcode() == ISD::TargetConstant;
+  }
+
+  /// PromotedIntegers - For integer nodes that are below legal width, this map
+  /// indicates what promoted value to use.
+  DenseMap<SDValue, SDValue> PromotedIntegers;
+
+  /// ExpandedIntegers - For integer nodes that need to be expanded this map
+  /// indicates which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+
+  /// SoftenedFloats - For floating point nodes converted to integers of
+  /// the same size, this map indicates the converted value to use.
+  DenseMap<SDValue, SDValue> SoftenedFloats;
+
+  /// ExpandedFloats - For float nodes that need to be expanded this map
+  /// indicates which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+
+  /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+  /// scalar value of type 'ty' to use.
+  DenseMap<SDValue, SDValue> ScalarizedVectors;
+
+  /// SplitVectors - For nodes that need to be split this map indicates
+  /// which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+
+  /// WidenedVectors - For vector nodes that need to be widened, indicates
+  /// the widened value to use.
+  DenseMap<SDValue, SDValue> WidenedVectors;
+
+  /// ReplacedValues - For values that have been replaced with another,
+  /// indicates the replacement value to use.
+  DenseMap<SDValue, SDValue> ReplacedValues;
+
+  /// Worklist - This defines a worklist of nodes to process.  In order to be
+  /// pushed onto this worklist, all operands of a node must have already been
+  /// processed.
+  SmallVector<SDNode*, 128> Worklist;
+
+public:
+  explicit DAGTypeLegalizer(SelectionDAG &dag)
+    : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+    ValueTypeActions(TLI.getValueTypeActions()) {
+    assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+           "Too many value types for ValueTypeActions to hold!");
+  }
+
+  /// run - This is the main entry point for the type legalizer.  This does a
+  /// top-down traversal of the dag, legalizing types as it goes.  Returns
+  /// "true" if it made any changes.
+  bool run();
+
+  void NoteDeletion(SDNode *Old, SDNode *New) {
+    ExpungeNode(Old);
+    ExpungeNode(New);
+    for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+      ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+  }
+
+private:
+  SDNode *AnalyzeNewNode(SDNode *N);
+  void AnalyzeNewValue(SDValue &Val);
+  void ExpungeNode(SDNode *N);
+  void PerformExpensiveChecks();
+  void RemapValue(SDValue &N);
+
+  // Common routines.
+  SDValue BitConvertToInteger(SDValue Op);
+  SDValue BitConvertVectorToIntegerVector(SDValue Op);
+  SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+  bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+  bool CustomWidenLowerNode(SDNode *N, EVT VT);
+  SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
+  SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+  SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+  SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+                      const SDValue *Ops, unsigned NumOps, bool isSigned,
+                      DebugLoc dl);
+  SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+  void ReplaceValueWith(SDValue From, SDValue To);
+  void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+                    SDValue &Lo, SDValue &Hi);
+
+  //===--------------------------------------------------------------------===//
+  // Integer Promotion Support: LegalizeIntegerTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+  /// larger integer type, this returns the promoted value.  The low bits of the
+  /// promoted value corresponding to the original type are exactly equal to Op.
+  /// The extra bits contain rubbish, so the promoted value may need to be zero-
+  /// or sign-extended from the original type before it is usable (the helpers
+  /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+  /// For example, if Op is an i16 and was promoted to an i32, then this method
+  /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+  /// 16 bits of which contain rubbish.
+  SDValue GetPromotedInteger(SDValue Op) {
+    SDValue &PromotedOp = PromotedIntegers[Op];
+    RemapValue(PromotedOp);
+    assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+    return PromotedOp;
+  }
+  void SetPromotedInteger(SDValue Op, SDValue Result);
+
+  /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+  /// final size.
+  SDValue SExtPromotedInteger(SDValue Op) {
+    EVT OldVT = Op.getValueType();
+    DebugLoc dl = Op.getDebugLoc();
+    Op = GetPromotedInteger(Op);
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+                       DAG.getValueType(OldVT));
+  }
+
+  /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+  /// final size.
+  SDValue ZExtPromotedInteger(SDValue Op) {
+    EVT OldVT = Op.getValueType();
+    DebugLoc dl = Op.getDebugLoc();
+    Op = GetPromotedInteger(Op);
+    return DAG.getZeroExtendInReg(Op, dl, OldVT);
+  }
+
+  // Integer Result Promotion.
+  void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_AssertSext(SDNode *N);
+  SDValue PromoteIntRes_AssertZext(SDNode *N);
+  SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+  SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+  SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+  SDValue PromoteIntRes_BSWAP(SDNode *N);
+  SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+  SDValue PromoteIntRes_Constant(SDNode *N);
+  SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+  SDValue PromoteIntRes_CTLZ(SDNode *N);
+  SDValue PromoteIntRes_CTPOP(SDNode *N);
+  SDValue PromoteIntRes_CTTZ(SDNode *N);
+  SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+  SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+  SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+  SDValue PromoteIntRes_Overflow(SDNode *N);
+  SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_SDIV(SDNode *N);
+  SDValue PromoteIntRes_SELECT(SDNode *N);
+  SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+  SDValue PromoteIntRes_SETCC(SDNode *N);
+  SDValue PromoteIntRes_SHL(SDNode *N);
+  SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+  SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+  SDValue PromoteIntRes_SRA(SDNode *N);
+  SDValue PromoteIntRes_SRL(SDNode *N);
+  SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+  SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_UDIV(SDNode *N);
+  SDValue PromoteIntRes_UNDEF(SDNode *N);
+  SDValue PromoteIntRes_VAARG(SDNode *N);
+  SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+  // Integer Operand Promotion.
+  bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+  SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+  SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+  SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+  SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+  SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+  SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+  SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_Shift(SDNode *N);
+  SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+  SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+  SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+  SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+  SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+  void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+  //===--------------------------------------------------------------------===//
+  // Integer Expansion Support: LegalizeIntegerTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetExpandedInteger - Given a processed operand Op which was expanded into
+  /// two integers of half the size, this returns the two halves.  The low bits
+  /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+  /// For example, if Op is an i64 which was expanded into two i32's, then this
+  /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+  /// Op, and Hi being equal to the upper 32 bits.
+  void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Integer Result Expansion.
+  void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+  void ExpandIntRes_ANY_EXTEND        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_AssertSext        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_AssertZext        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_Constant          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTLZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTPOP             (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTTZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_LOAD          (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SIGN_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_TRUNCATE          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ZERO_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void ExpandIntRes_Logical           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUBC           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUBE           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_BSWAP             (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_MUL               (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SDIV              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SREM              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UDIV              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UREM              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_Shift             (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+                             SDValue &Lo, SDValue &Hi);
+  bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+  bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Integer Operand Expansion.
+  bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+  SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+  SDValue ExpandIntOp_BR_CC(SDNode *N);
+  SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+  SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+  SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+  SDValue ExpandIntOp_SETCC(SDNode *N);
+  SDValue ExpandIntOp_Shift(SDNode *N);
+  SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+  SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+  SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+  SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+
+  void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                  ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+  /// integer of the same size, this returns the integer.  The integer contains
+  /// exactly the same bits as Op - only the type changed.  For example, if Op
+  /// is an f32 which was softened to an i32, then this method returns an i32,
+  /// the bits of which coincide with those of Op.
+  SDValue GetSoftenedFloat(SDValue Op) {
+    SDValue &SoftenedOp = SoftenedFloats[Op];
+    RemapValue(SoftenedOp);
+    assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+    return SoftenedOp;
+  }
+  void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+  // Result Float to Integer Conversion.
+  void SoftenFloatResult(SDNode *N, unsigned OpNo);
+  SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+  SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+  SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+  SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue SoftenFloatRes_FABS(SDNode *N);
+  SDValue SoftenFloatRes_FADD(SDNode *N);
+  SDValue SoftenFloatRes_FCEIL(SDNode *N);
+  SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+  SDValue SoftenFloatRes_FCOS(SDNode *N);
+  SDValue SoftenFloatRes_FDIV(SDNode *N);
+  SDValue SoftenFloatRes_FEXP(SDNode *N);
+  SDValue SoftenFloatRes_FEXP2(SDNode *N);
+  SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+  SDValue SoftenFloatRes_FLOG(SDNode *N);
+  SDValue SoftenFloatRes_FLOG2(SDNode *N);
+  SDValue SoftenFloatRes_FLOG10(SDNode *N);
+  SDValue SoftenFloatRes_FMUL(SDNode *N);
+  SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+  SDValue SoftenFloatRes_FNEG(SDNode *N);
+  SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+  SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+  SDValue SoftenFloatRes_FPOW(SDNode *N);
+  SDValue SoftenFloatRes_FPOWI(SDNode *N);
+  SDValue SoftenFloatRes_FREM(SDNode *N);
+  SDValue SoftenFloatRes_FRINT(SDNode *N);
+  SDValue SoftenFloatRes_FSIN(SDNode *N);
+  SDValue SoftenFloatRes_FSQRT(SDNode *N);
+  SDValue SoftenFloatRes_FSUB(SDNode *N);
+  SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+  SDValue SoftenFloatRes_LOAD(SDNode *N);
+  SDValue SoftenFloatRes_SELECT(SDNode *N);
+  SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+  SDValue SoftenFloatRes_UNDEF(SDNode *N);
+  SDValue SoftenFloatRes_VAARG(SDNode *N);
+  SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+  // Operand Float to Integer Conversion.
+  bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+  SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+  SDValue SoftenFloatOp_BR_CC(SDNode *N);
+  SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+  SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+  SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+  SDValue SoftenFloatOp_SETCC(SDNode *N);
+  SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+  void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                           ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Float Expansion Support: LegalizeFloatTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetExpandedFloat - Given a processed operand Op which was expanded into
+  /// two floating point values of half the size, this returns the two halves.
+  /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+  /// exactly equal Hi.  For example, if Op is a ppcf128 which was expanded
+  /// into two f64's, then this method returns the two f64's, with Lo being
+  /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+  void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Float Result Expansion.
+  void ExpandFloatResult(SDNode *N, unsigned ResNo);
+  void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FABS      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FADD      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCEIL     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCOS      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FDIV      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FEXP      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FEXP2     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FFLOOR    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG2     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG10    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMUL      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FNEG      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FPOW      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FPOWI     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FRINT     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSUB      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FTRUNC    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_LOAD      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Float Operand Expansion.
+  bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+  SDValue ExpandFloatOp_BR_CC(SDNode *N);
+  SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+  SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+  SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+  SDValue ExpandFloatOp_SETCC(SDNode *N);
+  SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+  void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Scalarization Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetScalarizedVector - Given a processed one-element vector Op which was
+  /// scalarized to its element type, this returns the element.  For example,
+  /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+  SDValue GetScalarizedVector(SDValue Op) {
+    SDValue &ScalarizedOp = ScalarizedVectors[Op];
+    RemapValue(ScalarizedOp);
+    assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+    return ScalarizedOp;
+  }
+  void SetScalarizedVector(SDValue Op, SDValue Result);
+
+  // Vector Result Scalarization: <1 x ty> -> ty.
+  void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecRes_BinOp(SDNode *N);
+  SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+  SDValue ScalarizeVecRes_InregOp(SDNode *N);
+
+  SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+  SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+  SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+  SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+  SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+  SDValue ScalarizeVecRes_SELECT(SDNode *N);
+  SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+  SDValue ScalarizeVecRes_SETCC(SDNode *N);
+  SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+  SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+  SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+  // Vector Operand Scalarization: <1 x ty> -> ty.
+  bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+  SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Splitting Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetSplitVector - Given a processed vector Op which was split into vectors
+  /// of half the size, this method returns the halves.  The first elements of
+  /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+  /// with the elements of Hi: Op is what you would get by concatenating Lo and
+  /// Hi.  For example, if Op is a v8i32 that was split into two v4i32's, then
+  /// this method returns the two v4i32's, with Lo corresponding to the first 4
+  /// elements of Op, and Hi to the last 4 elements.
+  void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+  void SplitVectorResult(SDNode *N, unsigned OpNo);
+  void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+                                  SDValue &Hi);
+
+  // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+  bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+  SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+  SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Widening Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetWidenedVector - Given a processed vector Op which was widened into a
+  /// larger vector, this method returns the larger vector.  The elements of
+  /// the returned vector consist of the elements of Op followed by elements
+  /// containing rubbish.  For example, if Op is a v2i32 that was widened to a
+  /// v4i32, then this method returns a v4i32 for which the first two elements
+  /// are the same as those of Op, while the last two elements contain rubbish.
+  SDValue GetWidenedVector(SDValue Op) {
+    SDValue &WidenedOp = WidenedVectors[Op];
+    RemapValue(WidenedOp);
+    assert(WidenedOp.getNode() && "Operand wasn't widened?");
+    return WidenedOp;
+  }
+  void SetWidenedVector(SDValue Op, SDValue Result);
+
+  // Widen Vector Result Promotion.
+  void WidenVectorResult(SDNode *N, unsigned ResNo);
+  SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+  SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+  SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+  SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+  SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+  SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_LOAD(SDNode* N);
+  SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+  SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
+  SDValue WidenVecRes_SELECT(SDNode* N);
+  SDValue WidenVecRes_SELECT_CC(SDNode* N);
+  SDValue WidenVecRes_SETCC(SDNode* N);
+  SDValue WidenVecRes_UNDEF(SDNode *N);
+  SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+  SDValue WidenVecRes_VSETCC(SDNode* N);
+
+  SDValue WidenVecRes_Binary(SDNode *N);
+  SDValue WidenVecRes_Convert(SDNode *N);
+  SDValue WidenVecRes_Shift(SDNode *N);
+  SDValue WidenVecRes_Unary(SDNode *N);
+  SDValue WidenVecRes_InregOp(SDNode *N);
+
+  // Widen Vector Operand.
+  bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+  SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+  SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue WidenVecOp_STORE(SDNode* N);
+
+  SDValue WidenVecOp_Convert(SDNode *N);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// Helper GenWidenVectorLoads - Helper function to generate a set of
+  /// loads to load a vector with a resulting wider type. It takes
+  ///   LdChain: list of chains for the load to be generated.
+  ///   Ld:      load to widen
+  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+                              LoadSDNode *LD);
+
+  /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+  /// loads to load a ector with a resulting wider type.  It takes
+  ///   LdChain: list of chains for the load to be generated.
+  ///   Ld:      load to widen
+  ///   ExtType: extension element type
+  SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+                                 LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+  /// Helper genWidenVectorStores - Helper function to generate a set of
+  /// stores to store a widen vector into non widen memory
+  ///   StChain: list of chains for the stores we have generated
+  ///   ST:      store of a widen value
+  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+  /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+  /// stores to store a truncate widen vector into non widen memory
+  ///   StChain: list of chains for the stores we have generated
+  ///   ST:      store of a widen value
+  void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+                                 StoreSDNode *ST);
+
+  /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+  /// input vector must have the same element type as NVT.
+  SDValue ModifyToType(SDValue InOp, EVT WidenVT);
+
+
+  //===--------------------------------------------------------------------===//
+  // Generic Splitting: LegalizeTypesGeneric.cpp
+  //===--------------------------------------------------------------------===//
+
+  // Legalization methods which only use that the illegal type is split into two
+  // not necessarily identical types.  As such they can be used for splitting
+  // vectors and expanding integers and floats.
+
+  void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+    if (Op.getValueType().isVector())
+      GetSplitVector(Op, Lo, Hi);
+    else if (Op.getValueType().isInteger())
+      GetExpandedInteger(Op, Lo, Hi);
+    else
+      GetExpandedFloat(Op, Lo, Hi);
+  }
+
+  /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+  /// which is split (or expanded) into two not necessarily identical pieces.
+  void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
+
+  /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+  /// high parts of the given value.
+  void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+  // Generic Result Splitting.
+  void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_SELECT      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_SELECT_CC   (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_UNDEF       (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  //===--------------------------------------------------------------------===//
+  // Generic Expansion: LegalizeTypesGeneric.cpp
+  //===--------------------------------------------------------------------===//
+
+  // Legalization methods which only use that the illegal type is split into two
+  // identical types of half the size, and that the Lo/Hi part is stored first
+  // in memory on little/big-endian machines, followed by the Hi/Lo part.  As
+  // such they can be used for expanding integers and floats.
+
+  void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+    if (Op.getValueType().isInteger())
+      GetExpandedInteger(Op, Lo, Hi);
+    else
+      GetExpandedFloat(Op, Lo, Hi);
+  }
+
+  // Generic Result Expansion.
+  void ExpandRes_BIT_CONVERT       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_BUILD_PAIR        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_EXTRACT_ELEMENT   (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_NormalLoad        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_VAARG             (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Generic Operand Expansion.
+  SDValue ExpandOp_BIT_CONVERT      (SDNode *N);
+  SDValue ExpandOp_BUILD_VECTOR     (SDNode *N);
+  SDValue ExpandOp_EXTRACT_ELEMENT  (SDNode *N);
+  SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+  SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+  SDValue ExpandOp_NormalStore      (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 0000000..a1b6ced
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,470 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type.  The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part.  This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+
+void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
+                                             SDValue &Hi) {
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Handle some special cases efficiently.
+  switch (getTypeAction(InVT)) {
+    default:
+      assert(false && "Unknown type action!");
+    case Legal:
+    case PromoteInteger:
+      break;
+    case SoftenFloat:
+      // Convert the integer operand instead.
+      SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      return;
+    case ExpandInteger:
+    case ExpandFloat:
+      // Convert the expanded pieces of the input.
+      GetExpandedOp(InOp, Lo, Hi);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      return;
+    case SplitVector:
+      GetSplitVector(InOp, Lo, Hi);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      return;
+    case ScalarizeVector:
+      // Convert the element instead.
+      SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      return;
+    case WidenVector: {
+      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+      InOp = GetWidenedVector(InOp);
+      EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                   InVT.getVectorNumElements()/2);
+      Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                       DAG.getIntPtrConstant(0));
+      Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                       DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      return;
+    }
+  }
+
+  if (InVT.isVector() && OutVT.isInteger()) {
+    // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
+    // is legal but the result is not.
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
+
+    if (isTypeLegal(NVT)) {
+      SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
+      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+                       DAG.getIntPtrConstant(0));
+      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+                       DAG.getIntPtrConstant(1));
+
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+
+      return;
+    }
+  }
+
+  // Lower the bit-convert to a store/load from the stack.
+  assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+  // Create the stack frame object.  Make sure it is aligned for both
+  // the source and expanded destination types.
+  unsigned Alignment =
+    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.
+                                              getTypeForEVT(*DAG.getContext()));
+  SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+
+  // Emit a store to the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);
+
+  // Load the first half from the stack slot.
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);
+
+  // Increment the pointer to the other half.
+  unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+  StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                         DAG.getIntPtrConstant(IncrementSize));
+
+  // Load the second half from the stack slot.
+  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+                   MinAlign(Alignment, IncrementSize));
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  // Return the operands.
+  Lo = N->getOperand(0);
+  Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  GetExpandedOp(N->getOperand(0), Lo, Hi);
+  SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+                   Hi : Lo;
+
+  assert(Part.getValueType() == N->getValueType(0) &&
+         "Type twice as big as expanded type not itself expanded!");
+
+  GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+                                                    SDValue &Hi) {
+  SDValue OldVec = N->getOperand(0);
+  unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Convert to a vector of the expanded element type, for example
+  // <3 x i64> -> <6 x i32>.
+  EVT OldVT = N->getValueType(0);
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+                                 EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts),
+                                 OldVec);
+
+  // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+  SDValue Idx = N->getOperand(1);
+
+  // Make sure the type of Idx is big enough to hold the new values.
+  if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+  Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(1, Idx.getValueType()));
+  Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+  DebugLoc dl = N->getDebugLoc();
+
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  int SVOffset = LD->getSrcValueOffset();
+  unsigned Alignment = LD->getAlignment();
+  bool isVolatile = LD->isVolatile();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+                   isVolatile, Alignment);
+
+  // Increment the pointer to the other half.
+  unsigned IncrementSize = NVT.getSizeInBits() / 8;
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
+                   SVOffset+IncrementSize,
+                   isVolatile, MinAlign(Alignment, IncrementSize));
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                      Hi.getValue(1));
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Chain = N->getOperand(0);
+  SDValue Ptr = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+  Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2));
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0).isVector()) {
+    // An illegal expanding type is being converted to a legal vector type.
+    // Make a two element vector out of the expanded parts and convert that
+    // instead, but only if the new vector type is legal (otherwise there
+    // is no point, and it might create expansion loops).  For example, on
+    // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+    EVT OVT = N->getOperand(0).getValueType();
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2);
+
+    if (isTypeLegal(NVT)) {
+      SDValue Parts[2];
+      GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+      if (TLI.isBigEndian())
+        std::swap(Parts[0], Parts[1]);
+
+      SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+      return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+    }
+  }
+
+  // Otherwise, store to a temporary and load out again as the new type.
+  return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+  // The vector type is legal but the element type needs expansion.
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  EVT OldVT = N->getOperand(0).getValueType();
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  assert(OldVT == VecVT.getVectorElementType() &&
+         "BUILD_VECTOR operand type doesn't match vector element type!");
+
+  // Build a vector of twice the length out of the expanded elements.
+  // For example <3 x i64> -> <6 x i32>.
+  std::vector<SDValue> NewElts;
+  NewElts.reserve(NumElts*2);
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Lo, Hi;
+    GetExpandedOp(N->getOperand(i), Lo, Hi);
+    if (TLI.isBigEndian())
+      std::swap(Lo, Hi);
+    NewElts.push_back(Lo);
+    NewElts.push_back(Hi);
+  }
+
+  SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()),
+                                 &NewElts[0], NewElts.size());
+
+  // Convert the new vector to the old vector type.
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+  SDValue Lo, Hi;
+  GetExpandedOp(N->getOperand(0), Lo, Hi);
+  return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+  // The vector type is legal but the element type needs expansion.
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Val = N->getOperand(1);
+  EVT OldEVT = Val.getValueType();
+  EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+  assert(OldEVT == VecVT.getVectorElementType() &&
+         "Inserted element type doesn't match vector element type!");
+
+  // Bitconvert to a vector of twice the length with elements of the expanded
+  // type, insert the expanded vector elements, and then convert back.
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+                               NewVecVT, N->getOperand(0));
+
+  SDValue Lo, Hi;
+  GetExpandedOp(Val, Lo, Hi);
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  SDValue Idx = N->getOperand(2);
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+  NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+  Idx = DAG.getNode(ISD::ADD, dl,
+                    Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+  NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+  // Convert the new vector to the old vector type.
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+         "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(NumElts);
+  Ops[0] = N->getOperand(0);
+  SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+  for (unsigned i = 1; i < NumElts; ++i)
+    Ops[i] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+  assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+  DebugLoc dl = N->getDebugLoc();
+
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType());
+  SDValue Chain = St->getChain();
+  SDValue Ptr = St->getBasePtr();
+  int SVOffset = St->getSrcValueOffset();
+  unsigned Alignment = St->getAlignment();
+  bool isVolatile = St->isVolatile();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+  SDValue Lo, Hi;
+  GetExpandedOp(St->getValue(), Lo, Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+                    isVolatile, Alignment);
+
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+  Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
+                    SVOffset + IncrementSize,
+                    isVolatile, MinAlign(Alignment, IncrementSize));
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  // A MERGE_VALUES node can produce any number of values.  We know that the
+  // first illegal one needs to be expanded into Lo/Hi.
+  unsigned i;
+
+  // The string of legal results gets turned into input operands, which have
+  // the same type.
+  for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
+    ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+
+  // The first illegal result must be the one that needs to be expanded.
+  GetSplitOp(N->getOperand(i), Lo, Hi);
+
+  // Legalize the rest of the results into the input operands whether they are
+  // legal or not.
+  unsigned e = N->getNumValues();
+  for (++i; i != e; ++i)
+    ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+                                       SDValue &Hi) {
+  SDValue LL, LH, RL, RH;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitOp(N->getOperand(1), LL, LH);
+  GetSplitOp(N->getOperand(2), RL, RH);
+
+  SDValue Cond = N->getOperand(0);
+  Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
+  Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+                                          SDValue &Hi) {
+  SDValue LL, LH, RL, RH;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitOp(N->getOperand(2), LL, LH);
+  GetSplitOp(N->getOperand(3), RL, RH);
+
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+                   N->getOperand(1), LL, RL, N->getOperand(4));
+  Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+                   N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  Lo = DAG.getUNDEF(LoVT);
+  Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 0000000..b5f84c0
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,290 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types.  For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+  SelectionDAG& DAG;
+  TargetLowering& TLI;
+  bool Changed; // Keep track of whether anything changed
+
+  /// LegalizedNodes - For nodes that are of legal width, and that have more
+  /// than one use, this map indicates what regularized operand to use.  This
+  /// allows us to avoid legalizing the same thing more than once.
+  DenseMap<SDValue, SDValue> LegalizedNodes;
+
+  // Adds a node to the translation cache
+  void AddLegalizedOperand(SDValue From, SDValue To) {
+    LegalizedNodes.insert(std::make_pair(From, To));
+    // If someone requests legalization of the new node, return itself.
+    if (From != To)
+      LegalizedNodes.insert(std::make_pair(To, To));
+  }
+
+  // Legalizes the given node
+  SDValue LegalizeOp(SDValue Op);
+  // Assuming the node is legal, "legalize" the results
+  SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+  // Implements unrolling a VSETCC.
+  SDValue UnrollVSETCC(SDValue Op);
+  // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+  // isn't legal.
+  SDValue ExpandFNEG(SDValue Op);
+  // Implements vector promotion; this is essentially just bitcasting the
+  // operands to a different type and bitcasting the result back to the
+  // original type.
+  SDValue PromoteVectorOp(SDValue Op);
+
+  public:
+  bool Run();
+  VectorLegalizer(SelectionDAG& dag) :
+      DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+  // The legalize process is inherently a bottom-up recursive process (users
+  // legalize their uses before themselves).  Given infinite stack space, we
+  // could just start legalizing on the root and traverse the whole graph.  In
+  // practice however, this causes us to run out of stack space on large basic
+  // blocks.  To avoid this problem, compute an ordering of the nodes where each
+  // node is only legalized after all of its operands are legalized.
+  DAG.AssignTopologicalOrder();
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+    LegalizeOp(SDValue(I, 0));
+
+  // Finally, it's possible the root changed.  Get the new root.
+  SDValue OldRoot = DAG.getRoot();
+  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+  DAG.setRoot(LegalizedNodes[OldRoot]);
+
+  LegalizedNodes.clear();
+
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
+
+  return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+  // Generic legalization: just pass the operand through.
+  for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+    AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+  return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+  if (I != LegalizedNodes.end()) return I->second;
+
+  SDNode* Node = Op.getNode();
+
+  // Legalize the operands
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+    Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+  SDValue Result =
+      DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size());
+
+  bool HasVectorValue = false;
+  for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+       J != E;
+       ++J)
+    HasVectorValue |= J->isVector();
+  if (!HasVectorValue)
+    return TranslateLegalizeResults(Op, Result);
+
+  EVT QueryType;
+  switch (Op.getOpcode()) {
+  default:
+    return TranslateLegalizeResults(Op, Result);
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::SELECT:
+  case ISD::SELECT_CC:
+  case ISD::VSETCC:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FNEG:
+  case ISD::FABS:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+  case ISD::FPOWI:
+  case ISD::FPOW:
+  case ISD::FLOG:
+  case ISD::FLOG2:
+  case ISD::FLOG10:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FCEIL:
+  case ISD::FTRUNC:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::FFLOOR:
+    QueryType = Node->getValueType(0);
+    break;
+  case ISD::SIGN_EXTEND_INREG:
+  case ISD::FP_ROUND_INREG:
+    QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    QueryType = Node->getOperand(0).getValueType();
+    break;
+  }
+
+  switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+  case TargetLowering::Promote:
+    // "Promote" the operation by bitcasting
+    Result = PromoteVectorOp(Op);
+    Changed = true;
+    break;
+  case TargetLowering::Legal: break;
+  case TargetLowering::Custom: {
+    SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+    if (Tmp1.getNode()) {
+      Result = Tmp1;
+      break;
+    }
+    // FALL THROUGH
+  }
+  case TargetLowering::Expand:
+    if (Node->getOpcode() == ISD::FNEG)
+      Result = ExpandFNEG(Op);
+    else if (Node->getOpcode() == ISD::VSETCC)
+      Result = UnrollVSETCC(Op);
+    else
+      Result = DAG.UnrollVectorOp(Op.getNode());
+    break;
+  }
+
+  // Make sure that the generated code is itself legal.
+  if (Result != Op) {
+    Result = LegalizeOp(Result);
+    Changed = true;
+  }
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
+  return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+  // Vector "promotion" is basically just bitcasting and doing the operation
+  // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
+  // v1i64.
+  EVT VT = Op.getValueType();
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "Can't promote a vector with multiple results!");
+  EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  DebugLoc dl = Op.getDebugLoc();
+  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+    if (Op.getOperand(j).getValueType().isVector())
+      Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+    else
+      Operands[j] = Op.getOperand(j);
+  }
+
+  Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+    SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       Zero, Op.getOperand(0));
+  }
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+  EVT VT = Op.getValueType();
+  unsigned NumElems = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+  DebugLoc dl = Op.getDebugLoc();
+  SmallVector<SDValue, 8> Ops(NumElems);
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+                                  DAG.getIntPtrConstant(i));
+    SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+                                  DAG.getIntPtrConstant(i));
+    Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+                         LHSElem, RHSElem, CC);
+    Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+                         DAG.getConstant(APInt::getAllOnesValue
+                                         (EltVT.getSizeInBits()), EltVT),
+                         DAG.getConstant(0, EltVT));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+  return VectorLegalizer(*this).Run();
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 0000000..bf95bb5
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2364 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type.  For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register.  This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size.  For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue R = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to scalarize the result of this operator!");
+
+  case ISD::BIT_CONVERT:       R = ScalarizeVecRes_BIT_CONVERT(N); break;
+  case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
+  case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+  case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    R = ScalarizeVecRes_InregOp(N); break;
+  case ISD::FPOWI:             R = ScalarizeVecRes_FPOWI(N); break;
+  case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+  case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+  case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
+  case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
+  case ISD::UNDEF:             R = ScalarizeVecRes_UNDEF(N); break;
+  case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+  case ISD::VSETCC:            R = ScalarizeVecRes_VSETCC(N); break;
+
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCOS:
+  case ISD::FNEG:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
+  case ISD::FFLOOR:
+  case ISD::FCEIL:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::UINT_TO_FP:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    R = ScalarizeVecRes_UnaryOp(N);
+    break;
+
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::FADD:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FPOW:
+  case ISD::FREM:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::OR:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::SUB:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    R = ScalarizeVecRes_BinOp(N);
+    break;
+  }
+
+  // If R is null, the sub-method took care of registering the result.
+  if (R.getNode())
+    SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+                     NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+  return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+                              Op0, DAG.getValueType(NewVT),
+                              DAG.getValueType(Op0.getValueType()),
+                              N->getOperand(3),
+                              N->getOperand(4),
+                              cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     N->getValueType(0).getVectorElementType(),
+                     N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+  SDValue Op = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+  // The value to insert may have a wider type than the vector element type,
+  // so be sure to truncate it to the element type if necessary.
+  SDValue Op = N->getOperand(1);
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  if (Op.getValueType() != EltVT)
+    // FIXME: Can this happen for floating point types?
+    Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+  return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+  assert(N->isUnindexed() && "Indexed vector load?");
+
+  SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(),
+                               N->getExtensionType(),
+                               N->getValueType(0).getVectorElementType(),
+                               N->getChain(), N->getBasePtr(),
+                               DAG.getUNDEF(N->getBasePtr().getValueType()),
+                               N->getSrcValue(), N->getSrcValueOffset(),
+                               N->getMemoryVT().getVectorElementType(),
+                               N->isVolatile(), N->getOriginalAlignment());
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+  // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+  EVT DestVT = N->getValueType(0).getVectorElementType();
+  SDValue Op = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+                     LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+  // If the operand is wider than the vector element type then it is implicitly
+  // truncated.  Make that explicit here.
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  SDValue InOp = N->getOperand(0);
+  if (InOp.getValueType() != EltVT)
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+  return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(1));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0), LHS,
+                     GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+                     N->getOperand(0), N->getOperand(1),
+                     LHS, GetScalarizedVector(N->getOperand(3)),
+                     N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  DebugLoc DL = N->getDebugLoc();
+
+  // Turn it into a scalar SETCC.
+  return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+  // Figure out if the scalar is the LHS or RHS and return it.
+  SDValue Arg = N->getOperand(2).getOperand(0);
+  if (Arg.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+  unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+  return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  EVT NVT = N->getValueType(0).getVectorElementType();
+  EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+  DebugLoc DL = N->getDebugLoc();
+
+  // Turn it into a scalar SETCC.
+  SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
+
+  // VSETCC always returns a sign-extended value, while SETCC may not.  The
+  // SETCC result type may not match the vector element type.  Correct these.
+  if (NVT.bitsLE(SVT)) {
+    // The SETCC result type is bigger than the vector element type.
+    // Ensure the SETCC result is sign-extended.
+    if (TLI.getBooleanContents() !=
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+      Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
+                        DAG.getValueType(MVT::i1));
+    // Truncate to the final type.
+    return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
+  }
+
+  // The SETCC result type is smaller than the vector element type.
+  // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+  if (TLI.getBooleanContents() !=
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+    Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
+  // Sign extend to the final type.
+  return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+#ifndef NDEBUG
+      dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      dbgs() << "\n";
+#endif
+      llvm_unreachable("Do not know how to scalarize this operator's operand!");
+    case ISD::BIT_CONVERT:
+      Res = ScalarizeVecOp_BIT_CONVERT(N);
+      break;
+    case ISD::CONCAT_VECTORS:
+      Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+      break;
+    case ISD::EXTRACT_VECTOR_ELT:
+      Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+      break;
+    case ISD::STORE:
+      Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>.  Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+  SDValue Elt = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+                     N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+  SmallVector<SDValue, 8> Ops(N->getNumOperands());
+  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+    Ops[i] = GetScalarizedVector(N->getOperand(i));
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+                     &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue Res = GetScalarizedVector(N->getOperand(0));
+  if (Res.getValueType() != N->getValueType(0))
+    Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+                      Res);
+  return Res;
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>.  Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+  assert(N->isUnindexed() && "Indexed store of one-element vector?");
+  assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+  DebugLoc dl = N->getDebugLoc();
+
+  if (N->isTruncatingStore())
+    return DAG.getTruncStore(N->getChain(), dl,
+                             GetScalarizedVector(N->getOperand(1)),
+                             N->getBasePtr(),
+                             N->getSrcValue(), N->getSrcValueOffset(),
+                             N->getMemoryVT().getVectorElementType(),
+                             N->isVolatile(), N->getAlignment());
+
+  return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+                      N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+                      N->isVolatile(), N->getOriginalAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting.  At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Split node result: ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Lo, Hi;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SplitVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to split the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+
+  case ISD::BIT_CONVERT:       SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+  case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+  case ISD::CONVERT_RNDSAT:    SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
+  case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+  case ISD::FP_ROUND_INREG:    SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
+  case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::LOAD:
+    SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+    break;
+  case ISD::SETCC:
+  case ISD::VSETCC:
+    SplitVecRes_SETCC(N, Lo, Hi);
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+    break;
+
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::FNEG:
+  case ISD::FABS:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+  case ISD::FTRUNC:
+  case ISD::FFLOOR:
+  case ISD::FCEIL:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    SplitVecRes_UnaryOp(N, Lo, Hi);
+    break;
+
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::FDIV:
+  case ISD::FPOW:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::UREM:
+  case ISD::SREM:
+  case ISD::FREM:
+    SplitVecRes_BinOp(N, Lo, Hi);
+    break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+                                         SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  SDValue RHSLo, RHSHi;
+  GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  // We know the result is a vector.  The input may be either a vector or a
+  // scalar value.
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+
+  // Handle some special cases efficiently.
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+  case Legal:
+  case PromoteInteger:
+  case SoftenFloat:
+  case ScalarizeVector:
+    break;
+  case ExpandInteger:
+  case ExpandFloat:
+    // A scalar to vector conversion, where the scalar needs expansion.
+    // If the vector is being split in two then we can just convert the
+    // expanded pieces.
+    if (LoVT == HiVT) {
+      GetExpandedOp(InOp, Lo, Hi);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+      return;
+    }
+    break;
+  case SplitVector:
+    // If the input is a vector that needs to be split, convert each split
+    // piece of the input now.
+    GetSplitVector(InOp, Lo, Hi);
+    Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+    Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+    return;
+  }
+
+  // In the general case, convert the input to an integer and split it by hand.
+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+  if (TLI.isBigEndian())
+    std::swap(LoIntVT, HiIntVT);
+
+  SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+  Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+  Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+                                                SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  unsigned LoNumElts = LoVT.getVectorNumElements();
+  SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+  Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+  SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+  Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumSubvectors = N->getNumOperands() / 2;
+  if (NumSubvectors == 1) {
+    Lo = N->getOperand(0);
+    Hi = N->getOperand(1);
+    return;
+  }
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+  Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+  SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+  Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  SDValue DTyOpLo =  DAG.getValueType(LoVT);
+  SDValue DTyOpHi =  DAG.getValueType(HiVT);
+
+  SDValue RndOp = N->getOperand(3);
+  SDValue SatOp = N->getOperand(4);
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+  // Split the input.
+  SDValue VLo, VHi;
+  EVT InVT = N->getOperand(0).getValueType();
+  switch (getTypeAction(InVT)) {
+  default: llvm_unreachable("Unexpected type action!");
+  case Legal: {
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                      DAG.getIntPtrConstant(0));
+    VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  case SplitVector:
+    GetSplitVector(N->getOperand(0), VLo, VHi);
+    break;
+  case WidenVector: {
+    // If the result needs to be split and the input needs to be widened,
+    // the two types must have different lengths. Use the widened result
+    // and extract from it to do the split.
+    SDValue InOp = GetWidenedVector(N->getOperand(0));
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(0));
+    VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  }
+
+  SDValue STyOpLo =  DAG.getValueType(VLo.getValueType());
+  SDValue STyOpHi =  DAG.getValueType(VHi.getValueType());
+
+  Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,
+                            CvtCode);
+  Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,
+                            CvtCode);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Idx = N->getOperand(1);
+  EVT IdxVT = Idx.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+  Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
+                    DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
+  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+                                         SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+  Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+                   DAG.getValueType(LoVT));
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+                   DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Elt = N->getOperand(1);
+  SDValue Idx = N->getOperand(2);
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(Vec, Lo, Hi);
+
+  if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+    unsigned IdxVal = CIdx->getZExtValue();
+    unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+    if (IdxVal < LoNumElts)
+      Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+                       Lo.getValueType(), Lo, Elt, Idx);
+    else
+      Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+                       DAG.getIntPtrConstant(IdxVal - LoNumElts));
+    return;
+  }
+
+  // Spill the vector to the stack.
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+
+  // Store the new element.  This may be larger than the vector element type,
+  // so use a truncating store.
+  SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  unsigned Alignment =
+    TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
+
+  // Load the Lo part from the stack slot.
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);
+
+  // Increment the pointer to the other part.
+  unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+  StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                         DAG.getIntPtrConstant(IncrementSize));
+
+  // Load the Hi part from the stack slot.
+  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
+                   MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+                                                    SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+  Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+                                        SDValue &Hi) {
+  assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+  EVT LoVT, HiVT;
+  DebugLoc dl = LD->getDebugLoc();
+  GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  SDValue Ch = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+  const Value *SV = LD->getSrcValue();
+  int SVOffset = LD->getSrcValueOffset();
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned Alignment = LD->getOriginalAlignment();
+  bool isVolatile = LD->isVolatile();
+
+  EVT LoMemVT, HiMemVT;
+  GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+  Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
+                   SV, SVOffset, LoMemVT, isVolatile, Alignment);
+
+  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  SVOffset += IncrementSize;
+  Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
+                   SV, SVOffset, HiMemVT, isVolatile, Alignment);
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                   Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  // Split the input.
+  EVT InVT = N->getOperand(0).getValueType();
+  SDValue LL, LH, RL, RH;
+  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                               LoVT.getVectorNumElements());
+  LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(0));
+  LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(0));
+  RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  // Get the dest types - they may not match the input types, e.g. int_to_fp.
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  // Split the input.
+  EVT InVT = N->getOperand(0).getValueType();
+  switch (getTypeAction(InVT)) {
+  default: llvm_unreachable("Unexpected type action!");
+  case Legal: {
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                     DAG.getIntPtrConstant(0));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  case SplitVector:
+    GetSplitVector(N->getOperand(0), Lo, Hi);
+    break;
+  case WidenVector: {
+    // If the result needs to be split and the input needs to be widened,
+    // the two types must have different lengths. Use the widened result
+    // and extract from it to do the split.
+    SDValue InOp = GetWidenedVector(N->getOperand(0));
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(0));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  }
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+  Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+                                                  SDValue &Lo, SDValue &Hi) {
+  // The low and high parts of the original input give four input vectors.
+  SDValue Inputs[4];
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+  GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+  EVT NewVT = Inputs[0].getValueType();
+  unsigned NewElts = NewVT.getVectorNumElements();
+
+  // If Lo or Hi uses elements from at most two of the four input vectors, then
+  // express it as a vector shuffle of those two inputs.  Otherwise extract the
+  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+  SmallVector<int, 16> Ops;
+  for (unsigned High = 0; High < 2; ++High) {
+    SDValue &Output = High ? Hi : Lo;
+
+    // Build a shuffle mask for the output, discovering on the fly which
+    // input vectors to use as shuffle operands (recorded in InputUsed).
+    // If building a suitable shuffle vector proves too hard, then bail
+    // out with useBuildVector set.
+    unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+    unsigned FirstMaskIdx = High * NewElts;
+    bool useBuildVector = false;
+    for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+      // The mask element.  This indexes into the input.
+      int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+      // The input vector this mask element indexes into.
+      unsigned Input = (unsigned)Idx / NewElts;
+
+      if (Input >= array_lengthof(Inputs)) {
+        // The mask element does not index into any input vector.
+        Ops.push_back(-1);
+        continue;
+      }
+
+      // Turn the index into an offset from the start of the input vector.
+      Idx -= Input * NewElts;
+
+      // Find or create a shuffle vector operand to hold this input.
+      unsigned OpNo;
+      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+        if (InputUsed[OpNo] == Input) {
+          // This input vector is already an operand.
+          break;
+        } else if (InputUsed[OpNo] == -1U) {
+          // Create a new operand for this input vector.
+          InputUsed[OpNo] = Input;
+          break;
+        }
+      }
+
+      if (OpNo >= array_lengthof(InputUsed)) {
+        // More than two input vectors used!  Give up on trying to create a
+        // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
+        useBuildVector = true;
+        break;
+      }
+
+      // Add the mask index for the new shuffle vector.
+      Ops.push_back(Idx + OpNo * NewElts);
+    }
+
+    if (useBuildVector) {
+      EVT EltVT = NewVT.getVectorElementType();
+      SmallVector<SDValue, 16> SVOps;
+
+      // Extract the input elements by hand.
+      for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+        // The mask element.  This indexes into the input.
+        int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+        // The input vector this mask element indexes into.
+        unsigned Input = (unsigned)Idx / NewElts;
+
+        if (Input >= array_lengthof(Inputs)) {
+          // The mask element is "undef" or indexes off the end of the input.
+          SVOps.push_back(DAG.getUNDEF(EltVT));
+          continue;
+        }
+
+        // Turn the index into an offset from the start of the input vector.
+        Idx -= Input * NewElts;
+
+        // Extract the vector element by hand.
+        SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                    Inputs[Input], DAG.getIntPtrConstant(Idx)));
+      }
+
+      // Construct the Lo/Hi output using a BUILD_VECTOR.
+      Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+    } else if (InputUsed[0] == -1U) {
+      // No input vectors were used!  The result is undefined.
+      Output = DAG.getUNDEF(NewVT);
+    } else {
+      SDValue Op0 = Inputs[InputUsed[0]];
+      // If only one input was used, use an undefined vector for the other.
+      SDValue Op1 = InputUsed[1] == -1U ?
+        DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+      // At least one input vector was used.  Create a new shuffle vector.
+      Output =  DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+    }
+
+    Ops.clear();
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Split node operand: ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+#ifndef NDEBUG
+      dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      dbgs() << "\n";
+#endif
+      llvm_unreachable("Do not know how to split this operator's operand!");
+
+    case ISD::BIT_CONVERT:       Res = SplitVecOp_BIT_CONVERT(N); break;
+    case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+    case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::STORE:
+      Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
+
+    case ISD::CTTZ:
+    case ISD::CTLZ:
+    case ISD::CTPOP:
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT:
+    case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP:
+    case ISD::TRUNCATE:
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      Res = SplitVecOp_UnaryOp(N);
+      break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+  // The result has a legal vector type, but the input needs splitting.
+  EVT ResVT = N->getValueType(0);
+  SDValue Lo, Hi;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  EVT InVT = Lo.getValueType();
+
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+                               InVT.getVectorNumElements());
+
+  Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+  Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
+  // For example, i64 = BIT_CONVERT v4i16 on alpha.  Typically the vector will
+  // end up being split all the way down to individual components.  Convert the
+  // split pieces into integers and reassemble.
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  Lo = BitConvertToInteger(Lo);
+  Hi = BitConvertToInteger(Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+                     JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+  // We know that the extracted result type is legal.  For now, assume the index
+  // is a constant.
+  EVT SubVT = N->getValueType(0);
+  SDValue Idx = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+
+  uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+  if (IdxVal < LoElts) {
+    assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+           "Extracted subvector crosses vector split!");
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+  } else {
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+                       DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+  }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Idx = N->getOperand(1);
+  EVT VecVT = Vec.getValueType();
+
+  if (isa<ConstantSDNode>(Idx)) {
+    uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+    assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+    SDValue Lo, Hi;
+    GetSplitVector(Vec, Lo, Hi);
+
+    uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+    if (IdxVal < LoElts)
+      return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);
+    return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
+                                  DAG.getConstant(IdxVal - LoElts,
+                                                  Idx.getValueType()));
+  }
+
+  // Store the vector to the stack.
+  EVT EltVT = VecVT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);
+
+  // Load back the required element.
+  StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+                        SV, 0, EltVT);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+  assert(N->isUnindexed() && "Indexed store of vector?");
+  assert(OpNo == 1 && "Can only split the stored value");
+  DebugLoc dl = N->getDebugLoc();
+
+  bool isTruncating = N->isTruncatingStore();
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  int SVOffset = N->getSrcValueOffset();
+  EVT MemoryVT = N->getMemoryVT();
+  unsigned Alignment = N->getOriginalAlignment();
+  bool isVol = N->isVolatile();
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(1), Lo, Hi);
+
+  EVT LoMemVT, HiMemVT;
+  GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+  if (isTruncating)
+    Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+                           LoMemVT, isVol, Alignment);
+  else
+    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+                      isVol, Alignment);
+
+  // Increment the pointer to the other half.
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  SVOffset += IncrementSize;
+
+  if (isTruncating)
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+                           HiMemVT, isVol, Alignment);
+  else
+    Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+                      isVol, Alignment);
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+
+  // See if the target wants to custom widen this node.
+  if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+    return;
+
+  SDValue Res = SDValue();
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "WidenVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to widen the result of this operator!");
+
+  case ISD::BIT_CONVERT:       Res = WidenVecRes_BIT_CONVERT(N); break;
+  case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
+  case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
+  case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+  case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    Res = WidenVecRes_InregOp(N); break;
+  case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
+  case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+  case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
+  case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             Res = WidenVecRes_SETCC(N); break;
+  case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
+  case ISD::VECTOR_SHUFFLE:
+    Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+    break;
+  case ISD::VSETCC:
+    Res = WidenVecRes_VSETCC(N);
+    break;
+
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::BSWAP:
+  case ISD::FADD:
+  case ISD::FCOPYSIGN:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FPOW:
+  case ISD::FPOWI:
+  case ISD::FREM:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
+  case ISD::OR:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::SUB:
+  case ISD::XOR:
+    Res = WidenVecRes_Binary(N);
+    break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    Res = WidenVecRes_Shift(N);
+    break;
+
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Res = WidenVecRes_Convert(N);
+    break;
+
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCOS:
+  case ISD::FNEG:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+    Res = WidenVecRes_Unary(N);
+    break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (Res.getNode())
+    SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+  // Binary op widening.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+  unsigned Opcode = N->getOpcode();
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+
+  if (getTypeAction(InVT) == WidenVector) {
+    InOp = GetWidenedVector(N->getOperand(0));
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts)
+      return DAG.getNode(Opcode, dl, WidenVT, InOp);
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      for (unsigned i = 1; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      return DAG.getNode(Opcode, dl, WidenVT,
+                         DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
+                         &Ops[0], NumConcat));
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      // Extract the input and convert the shorten input vector.
+      return DAG.getNode(Opcode, dl, WidenVT,
+                         DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
+                                     InOp, DAG.getIntPtrConstant(0)));
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = WidenVT.getVectorElementType();
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i)
+    Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+                                     DAG.getIntPtrConstant(i)));
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  SDValue ShOp = N->getOperand(1);
+
+  EVT ShVT = ShOp.getValueType();
+  if (getTypeAction(ShVT) == WidenVector) {
+    ShOp = GetWidenedVector(ShOp);
+    ShVT = ShOp.getValueType();
+  }
+  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(),
+                                   WidenVT.getVectorNumElements());
+  if (ShVT != ShWidenVT)
+    ShOp = ModifyToType(ShOp, ShWidenVT);
+
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+  // Unary op widening.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               cast<VTSDNode>(N->getOperand(1))->getVT()
+                                 .getVectorElementType(),
+                               WidenVT.getVectorNumElements());
+  SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+    break;
+  case Legal:
+    break;
+  case PromoteInteger:
+    // If the InOp is promoted to the same size, convert it.  Otherwise,
+    // fall out of the switch and widen the promoted input.
+    InOp = GetPromotedInteger(InOp);
+    InVT = InOp.getValueType();
+    if (WidenVT.bitsEq(InVT))
+      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+    break;
+  case SoftenFloat:
+  case ExpandInteger:
+  case ExpandFloat:
+  case ScalarizeVector:
+  case SplitVector:
+    break;
+  case WidenVector:
+    // If the InOp is widened to the same size, convert it.  Otherwise, fall
+    // out of the switch and widen the widened input.
+    InOp = GetWidenedVector(InOp);
+    InVT = InOp.getValueType();
+    if (WidenVT.bitsEq(InVT))
+      // The input widens to the same size. Convert to the widen value.
+      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+    break;
+  }
+
+  unsigned WidenSize = WidenVT.getSizeInBits();
+  unsigned InSize = InVT.getSizeInBits();
+  if (WidenSize % InSize == 0) {
+    // Determine new input vector type.  The new input vector type will use
+    // the same element type (if its a vector) or use the input type as a
+    // vector.  It is the same size as the type to widen to.
+    EVT NewInVT;
+    unsigned NewNumElts = WidenSize / InSize;
+    if (InVT.isVector()) {
+      EVT InEltVT = InVT.getVectorElementType();
+      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits());
+    } else {
+      NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+    }
+
+    if (TLI.isTypeLegal(NewInVT)) {
+      // Because the result and the input are different vector types, widening
+      // the result could create a legal type but widening the input might make
+      // it an illegal type that might lead to repeatedly splitting the input
+      // and then widening it. To avoid this, we widen the input only if
+      // it results in a legal type.
+      SmallVector<SDValue, 16> Ops(NewNumElts);
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      Ops[0] = InOp;
+      for (unsigned i = 1; i < NewNumElts; ++i)
+        Ops[i] = UndefVal;
+
+      SDValue NewVec;
+      if (InVT.isVector())
+        NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+                             NewInVT, &Ops[0], NewNumElts);
+      else
+        NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+                             NewInVT, &Ops[0], NewNumElts);
+      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+    }
+  }
+
+  return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  // Build a vector with undefined for the new nodes.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+  NewOps.reserve(WidenNumElts);
+  for (unsigned i = NumElts; i < WidenNumElts; ++i)
+    NewOps.push_back(DAG.getUNDEF(EltVT));
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+  EVT InVT = N->getOperand(0).getValueType();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  unsigned NumOperands = N->getNumOperands();
+
+  bool InputWidened = false; // Indicates we need to widen the input.
+  if (getTypeAction(InVT) != WidenVector) {
+    if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+      // Add undef vectors to widen to correct length.
+      unsigned NumConcat = WidenVT.getVectorNumElements() /
+                           InVT.getVectorNumElements();
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      for (unsigned i=0; i < NumOperands; ++i)
+        Ops[i] = N->getOperand(i);
+      for (unsigned i = NumOperands; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+    }
+  } else {
+    InputWidened = true;
+    if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+      // The inputs and the result are widen to the same value.
+      unsigned i;
+      for (i=1; i < NumOperands; ++i)
+        if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+          break;
+
+      if (i > NumOperands)
+        // Everything but the first operand is an UNDEF so just return the
+        // widened first operand.
+        return GetWidenedVector(N->getOperand(0));
+
+      if (NumOperands == 2) {
+        // Replace concat of two operands with a shuffle.
+        SmallVector<int, 16> MaskOps(WidenNumElts);
+        for (unsigned i=0; i < WidenNumElts/2; ++i) {
+          MaskOps[i] = i;
+          MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+        }
+        return DAG.getVectorShuffle(WidenVT, dl,
+                                    GetWidenedVector(N->getOperand(0)),
+                                    GetWidenedVector(N->getOperand(1)),
+                                    &MaskOps[0]);
+      }
+    }
+  }
+
+  // Fall back to use extracts and build vector.
+  EVT EltVT = WidenVT.getVectorElementType();
+  unsigned NumInElts = InVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  unsigned Idx = 0;
+  for (unsigned i=0; i < NumOperands; ++i) {
+    SDValue InOp = N->getOperand(i);
+    if (InputWidened)
+      InOp = GetWidenedVector(InOp);
+    for (unsigned j=0; j < NumInElts; ++j)
+        Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                                 DAG.getIntPtrConstant(j));
+  }
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; Idx < WidenNumElts; ++Idx)
+    Ops[Idx] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue InOp  = N->getOperand(0);
+  SDValue RndOp = N->getOperand(3);
+  SDValue SatOp = N->getOperand(4);
+
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+  SDValue DTyOp = DAG.getValueType(WidenVT);
+  SDValue STyOp = DAG.getValueType(InWidenVT);
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+  if (getTypeAction(InVT) == WidenVector) {
+    InOp = GetWidenedVector(InOp);
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts)
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                  SatOp, CvtCode);
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      for (unsigned i = 1; i != NumConcat; ++i) {
+        Ops[i] = UndefVal;
+      }
+      InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                  SatOp, CvtCode);
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      // Extract the input and convert the shorten input vector.
+      InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+                         DAG.getIntPtrConstant(0));
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                SatOp, CvtCode);
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = WidenVT.getVectorElementType();
+  DTyOp = DAG.getValueType(EltVT);
+  STyOp = DAG.getValueType(InEltVT);
+
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i) {
+    SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+                                 DAG.getIntPtrConstant(i));
+    Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+                                        SatOp, CvtCode);
+  }
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  EVT      VT = N->getValueType(0);
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SDValue  InOp = N->getOperand(0);
+  SDValue  Idx  = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  if (getTypeAction(InOp.getValueType()) == WidenVector)
+    InOp = GetWidenedVector(InOp);
+
+  EVT InVT = InOp.getValueType();
+
+  ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
+  if (CIdx) {
+    unsigned IdxVal = CIdx->getZExtValue();
+    // Check if we can just return the input vector after widening.
+    if (IdxVal == 0 && InVT == WidenVT)
+      return InOp;
+
+    // Check if we can extract from the vector.
+    unsigned InNumElts = InVT.getVectorNumElements();
+    if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+  }
+
+  // We could try widening the input to the right length but for now, extract
+  // the original elements, fill the rest with undefs and build a vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = VT.getVectorElementType();
+  EVT IdxVT = Idx.getValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned i;
+  if (CIdx) {
+    unsigned IdxVal = CIdx->getZExtValue();
+    for (i=0; i < NumElts; ++i)
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                           DAG.getConstant(IdxVal+i, IdxVT));
+  } else {
+    Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
+    for (i=1; i < NumElts; ++i) {
+      SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                                   DAG.getConstant(i, IdxVT));
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
+    }
+  }
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+                     InOp.getValueType(), InOp,
+                     N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+
+  SDValue Result;
+  SmallVector<SDValue, 16> LdChain;  // Chain for the series of load
+  if (ExtType != ISD::NON_EXTLOAD)
+    Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+  else
+    Result = GenWidenVectorLoads(LdChain, LD);
+
+  // If we generate a single load, we can use that for the chain.  Otherwise,
+  // build a factor node to remember the multiple loads are independent and
+  // chain to that.
+  SDValue NewChain;
+  if (LdChain.size() == 1)
+    NewChain = LdChain[0];
+  else
+    NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+                           &LdChain[0], LdChain.size());
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), NewChain);
+
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+                     WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue Cond1 = N->getOperand(0);
+  EVT CondVT = Cond1.getValueType();
+  if (CondVT.isVector()) {
+    EVT CondEltVT = CondVT.getVectorElementType();
+    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts);
+    if (getTypeAction(CondVT) == WidenVector)
+      Cond1 = GetWidenedVector(Cond1);
+
+    if (Cond1.getValueType() != CondWidenVT)
+       Cond1 = ModifyToType(Cond1, CondWidenVT);
+  }
+
+  SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+  assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+  SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     InOp1.getValueType(), N->getOperand(0),
+                     N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT,
+                     InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+  // Adjust mask based on new input vector length.
+  SmallVector<int, 16> NewMask;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    int Idx = N->getMaskElt(i);
+    if (Idx < (int)NumElts)
+      NewMask.push_back(Idx);
+    else
+      NewMask.push_back(Idx - NumElts + WidenNumElts);
+  }
+  for (unsigned i = NumElts; i != WidenNumElts; ++i)
+    NewMask.push_back(-1);
+  return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue InOp1 = N->getOperand(0);
+  EVT InVT = InOp1.getValueType();
+  assert(InVT.isVector() && "can not widen non vector type");
+  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts);
+  InOp1 = GetWidenedVector(InOp1);
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+  // Assume that the input and output will be widen appropriately.  If not,
+  // we will have to unroll it at some point.
+  assert(InOp1.getValueType() == WidenInVT &&
+         InOp2.getValueType() == WidenInVT &&
+         "Input not widened to expected type!");
+  return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+                     WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to widen this operator's operand!");
+
+  case ISD::BIT_CONVERT:        Res = WidenVecOp_BIT_CONVERT(N); break;
+  case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
+  case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+  case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
+
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Res = WidenVecOp_Convert(N);
+    break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+  // Since the result is legal and the input is illegal, it is unlikely
+  // that we can fix the input to a legal type so unroll the convert
+  // into some scalar code and create a nasty build vector.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue InOp = N->getOperand(0);
+  if (getTypeAction(InOp.getValueType()) == WidenVector)
+    InOp = GetWidenedVector(InOp);
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+
+  unsigned Opcode = N->getOpcode();
+  SmallVector<SDValue, 16> Ops(NumElts);
+  for (unsigned i=0; i < NumElts; ++i)
+    Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+                                     DAG.getIntPtrConstant(i)));
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  EVT InWidenVT = InOp.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Check if we can convert between two legal vector types and extract.
+  unsigned InWidenSize = InWidenVT.getSizeInBits();
+  unsigned Size = VT.getSizeInBits();
+  if (InWidenSize % Size == 0 && !VT.isVector()) {
+    unsigned NewNumElts = InWidenSize / Size;
+    EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+    if (TLI.isTypeLegal(NewVT)) {
+      SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+                         DAG.getIntPtrConstant(0));
+    }
+  }
+
+  return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+  // If the input vector is not legal, it is likely that we will not find a
+  // legal vector of the same size. Replace the concatenate vector with a
+  // nasty build vector.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(NumElts);
+
+  EVT InVT = N->getOperand(0).getValueType();
+  unsigned NumInElts = InVT.getVectorNumElements();
+
+  unsigned Idx = 0;
+  unsigned NumOperands = N->getNumOperands();
+  for (unsigned i=0; i < NumOperands; ++i) {
+    SDValue InOp = N->getOperand(i);
+    if (getTypeAction(InOp.getValueType()) == WidenVector)
+      InOp = GetWidenedVector(InOp);
+    for (unsigned j=0; j < NumInElts; ++j)
+      Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                               DAG.getIntPtrConstant(j));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(),
+                     N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+  // We have to widen the value but we want only to store the original
+  // vector type.
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+
+  SmallVector<SDValue, 16> StChain;
+  if (ST->isTruncatingStore())
+    GenWidenVectorTruncStores(StChain, ST);
+  else
+    GenWidenVectorStores(StChain, ST);
+
+  if (StChain.size() == 1)
+    return StChain[0];
+  else
+    return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
+                       MVT::Other,&StChain[0],StChain.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+//  TLI:       Target lowering used to determine legal types.
+//  Width:     Width left need to load/store.
+//  WidenVT:   The widen vector type to load to/store from
+//  Align:     If 0, don't allow use of a wider type
+//  WidenEx:   If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+                       unsigned Width, EVT WidenVT,
+                       unsigned Align = 0, unsigned WidenEx = 0) {
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  unsigned WidenWidth = WidenVT.getSizeInBits();
+  unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+  unsigned AlignInBits = Align*8;
+
+  // If we have one element to load/store, return it.
+  EVT RetVT = WidenEltVT;
+  if (Width == WidenEltWidth)
+    return RetVT;
+
+  // See if there is larger legal integer than the element type to load/store 
+  unsigned VT;
+  for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+       VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+    EVT MemVT((MVT::SimpleValueType) VT);
+    unsigned MemVTWidth = MemVT.getSizeInBits();
+    if (MemVT.getSizeInBits() <= WidenEltWidth)
+      break;
+    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+        (MemVTWidth <= Width ||
+         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      RetVT = MemVT;
+      break;
+    }
+  }
+
+  // See if there is a larger vector type to load/store that has the same vector
+  // element type and is evenly divisible with the WidenVT.
+  for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+       VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+    EVT MemVT = (MVT::SimpleValueType) VT;
+    unsigned MemVTWidth = MemVT.getSizeInBits();
+    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+        (WidenWidth % MemVTWidth) == 0 &&
+        (MemVTWidth <= Width ||
+         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+        return MemVT;
+    }
+  }
+
+  return RetVT;
+}
+
+// Builds a vector type from scalar loads
+//  VecTy: Resulting Vector type
+//  LDOps: Load operators to build a vector type
+//  [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+                                     SmallVector<SDValue, 16>& LdOps,
+                                     unsigned Start, unsigned End) {
+  DebugLoc dl = LdOps[Start].getDebugLoc();
+  EVT LdTy = LdOps[Start].getValueType();
+  unsigned Width = VecTy.getSizeInBits();
+  unsigned NumElts = Width / LdTy.getSizeInBits();
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+  unsigned Idx = 1;
+  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+  for (unsigned i = Start + 1; i != End; ++i) {
+    EVT NewLdTy = LdOps[i].getValueType();
+    if (NewLdTy != LdTy) {
+      NumElts = Width / NewLdTy.getSizeInBits();
+      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+      // Readjust position and vector position based on new load type
+      Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+      LdTy = NewLdTy;
+    }
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+                        DAG.getIntPtrConstant(Idx++));
+  }
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+                                              LoadSDNode * LD) {
+  // The strategy assumes that we can efficiently load powers of two widths.
+  // The routines chops the vector into the largest vector loads with the same
+  // element type or scalar loads and then recombines it to the widen vector
+  // type.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  unsigned WidenWidth = WidenVT.getSizeInBits();
+  EVT LdVT    = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  int       SVOffset = LD->getSrcValueOffset();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  const Value *SV = LD->getSrcValue();
+
+  int LdWidth = LdVT.getSizeInBits();
+  int WidthDiff = WidenWidth - LdWidth;          // Difference
+  unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+
+  // Find the vector type that can load from.
+  EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+  int NewVTWidth = NewVT.getSizeInBits();
+  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+                             isVolatile, Align);
+  LdChain.push_back(LdOp.getValue(1));
+
+  // Check if we can load the element with one instruction
+  if (LdWidth <= NewVTWidth) {
+    if (NewVT.isVector()) {
+      if (NewVT != WidenVT) {
+        assert(WidenWidth % NewVTWidth == 0);
+        unsigned NumConcat = WidenWidth / NewVTWidth;
+        SmallVector<SDValue, 16> ConcatOps(NumConcat);
+        SDValue UndefVal = DAG.getUNDEF(NewVT);
+        ConcatOps[0] = LdOp;
+        for (unsigned i = 1; i != NumConcat; ++i)
+          ConcatOps[i] = UndefVal;
+        return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+                           NumConcat);
+      } else
+        return LdOp;
+    } else {
+      unsigned NumElts = WidenWidth / LdWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+    }
+  }
+
+  // Load vector by using multiple loads from largest vector to scalar
+  SmallVector<SDValue, 16> LdOps;
+  LdOps.push_back(LdOp);
+
+  LdWidth -= NewVTWidth;
+  unsigned Offset = 0;
+
+  while (LdWidth > 0) {
+    unsigned Increment = NewVTWidth / 8;
+    Offset += Increment;
+    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                          DAG.getIntPtrConstant(Increment));
+
+    if (LdWidth < NewVTWidth) {
+      // Our current type we are using is too large, find a better size
+      NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+      NewVTWidth = NewVT.getSizeInBits();
+    }
+
+    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
+                               SVOffset+Offset, isVolatile,
+                               MinAlign(Align, Increment));
+    LdChain.push_back(LdOp.getValue(1));
+    LdOps.push_back(LdOp);
+
+    LdWidth -= NewVTWidth;
+  }
+
+  // Build the vector from the loads operations
+  unsigned End = LdOps.size();
+  if (LdOps[0].getValueType().isVector()) {
+    // If the load contains vectors, build the vector using concat vector.
+    // All of the vectors used to loads are power of 2 and the scalars load
+    // can be combined to make a power of 2 vector.
+    SmallVector<SDValue, 16> ConcatOps(End);
+    int i = End - 1;
+    int Idx = End;
+    EVT LdTy = LdOps[i].getValueType();
+    // First combine the scalar loads to a vector
+    if (!LdTy.isVector())  {
+      for (--i; i >= 0; --i) {
+        LdTy = LdOps[i].getValueType();
+        if (LdTy.isVector())
+          break;
+      }
+      ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+    }
+    ConcatOps[--Idx] = LdOps[i];
+    for (--i; i >= 0; --i) {
+      EVT NewLdTy = LdOps[i].getValueType();
+      if (NewLdTy != LdTy) {
+        // Create a larger vector
+        ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+                                       &ConcatOps[Idx], End - Idx);
+        Idx = End - 1;
+        LdTy = NewLdTy;
+      }
+      ConcatOps[--Idx] = LdOps[i];
+    }
+
+    if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
+      // We need to fill the rest with undefs to build the vector
+      unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+      SmallVector<SDValue, 16> WidenOps(NumOps);
+      SDValue UndefVal = DAG.getUNDEF(LdTy);
+      unsigned i = 0;
+      for (; i != End-Idx; ++i)
+        WidenOps[i] = ConcatOps[Idx+i];
+      for (; i != NumOps; ++i)
+        WidenOps[i] = UndefVal;
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+    } else
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+                         &ConcatOps[Idx], End - Idx);
+  } else // All the loads are scalar loads.
+    return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+                                         LoadSDNode * LD,
+                                         ISD::LoadExtType ExtType) {
+  // For extension loads, it may not be more efficient to chop up the vector
+  // and then extended it.  Instead, we unroll the load and build a new vector.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT    = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  assert(LdVT.isVector() && WidenVT.isVector());
+
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  int       SVOffset = LD->getSrcValueOffset();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  const Value *SV = LD->getSrcValue();
+
+  EVT EltVT = WidenVT.getVectorElementType();
+  EVT LdEltVT = LdVT.getVectorElementType();
+  unsigned NumElts = LdVT.getVectorNumElements();
+
+  // Load each element and widen
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  unsigned Increment = LdEltVT.getSizeInBits() / 8;
+  Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
+                          LdEltVT, isVolatile, Align);
+  LdChain.push_back(Ops[0].getValue(1));
+  unsigned i = 0, Offset = Increment;
+  for (i=1; i < NumElts; ++i, Offset += Increment) {
+    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+                                     BasePtr, DAG.getIntPtrConstant(Offset));
+    Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
+                            SVOffset + Offset, LdEltVT, isVolatile, Align);
+    LdChain.push_back(Ops[i].getValue(1));
+  }
+
+  // Fill the rest with undefs
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i != WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+                                            StoreSDNode *ST) {
+  // The strategy assumes that we can efficiently store powers of two widths.
+  // The routines chops the vector into the largest vector stores with the same
+  // element type or scalar stores.
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  const    Value *SV = ST->getSrcValue();
+  int      SVOffset = ST->getSrcValueOffset();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  DebugLoc dl = ST->getDebugLoc();
+
+  EVT StVT = ST->getMemoryVT();
+  unsigned StWidth = StVT.getSizeInBits();
+  EVT ValVT = ValOp.getValueType();
+  unsigned ValWidth = ValVT.getSizeInBits();
+  EVT ValEltVT = ValVT.getVectorElementType();
+  unsigned ValEltWidth = ValEltVT.getSizeInBits();
+  assert(StVT.getVectorElementType() == ValEltVT);
+
+  int Idx = 0;          // current index to store
+  unsigned Offset = 0;  // offset from base to store
+  while (StWidth != 0) {
+    // Find the largest vector type we can store with
+    EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+    unsigned NewVTWidth = NewVT.getSizeInBits();
+    unsigned Increment = NewVTWidth / 8;
+    if (NewVT.isVector()) {
+      unsigned NumVTElts = NewVT.getVectorNumElements();
+      do {
+        SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+                                   DAG.getIntPtrConstant(Idx));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+                                       SVOffset + Offset, isVolatile,
+                                       MinAlign(Align, Offset)));
+        StWidth -= NewVTWidth;
+        Offset += Increment;
+        Idx += NumVTElts;
+        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                              DAG.getIntPtrConstant(Increment));
+      } while (StWidth != 0 && StWidth >= NewVTWidth);
+    } else {
+      // Cast the vector to the scalar type we can store
+      unsigned NumElts = ValWidth / NewVTWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+      // Readjust index position based on new vector type
+      Idx = Idx * ValEltWidth / NewVTWidth;
+      do {
+        SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+                      DAG.getIntPtrConstant(Idx++));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+                                   SVOffset + Offset, isVolatile,
+                                   MinAlign(Align, Offset)));
+        StWidth -= NewVTWidth;
+        Offset += Increment;
+        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                              DAG.getIntPtrConstant(Increment));
+      } while (StWidth != 0  && StWidth >= NewVTWidth);
+      // Restore index back to be relative to the original widen element type
+      Idx = Idx * NewVTWidth / ValEltWidth;
+    }
+  }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+                                            StoreSDNode *ST) {
+  // For extension loads, it may not be more efficient to truncate the vector
+  // and then store it.  Instead, we extract each element and then store it.
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  const    Value *SV = ST->getSrcValue();
+  int      SVOffset = ST->getSrcValueOffset();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  DebugLoc dl = ST->getDebugLoc();
+  
+  EVT StVT = ST->getMemoryVT();
+  EVT ValVT = ValOp.getValueType();
+
+  // It must be true that we the widen vector type is bigger than where
+  // we need to store.
+  assert(StVT.isVector() && ValOp.getValueType().isVector());
+  assert(StVT.bitsLT(ValOp.getValueType()));
+
+  // For truncating stores, we can not play the tricks of chopping legal
+  // vector types and bit cast it to the right type.  Instead, we unroll
+  // the store.
+  EVT StEltVT  = StVT.getVectorElementType();
+  EVT ValEltVT = ValVT.getVectorElementType();
+  unsigned Increment = ValEltVT.getSizeInBits() / 8;
+  unsigned NumElts = StVT.getVectorNumElements();
+  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+                            DAG.getIntPtrConstant(0));
+  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
+                                      SVOffset, StEltVT,
+                                      isVolatile, Align));
+  unsigned Offset = Increment;
+  for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+                                     BasePtr, DAG.getIntPtrConstant(Offset));
+    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+                            DAG.getIntPtrConstant(0));
+    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
+                                        SVOffset + Offset, StEltVT,
+                                        isVolatile, MinAlign(Align, Offset)));
+  }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+  // Note that InOp might have been widened so it might already have
+  // the right width or it might need be narrowed.
+  EVT InVT = InOp.getValueType();
+  assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+         "input and widen element type must match");
+  DebugLoc dl = InOp.getDebugLoc();
+
+  // Check if InOp already has the right width.
+  if (InVT == NVT)
+    return InOp;
+
+  unsigned InNumElts = InVT.getVectorNumElements();
+  unsigned WidenNumElts = NVT.getVectorNumElements();
+  if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+    unsigned NumConcat = WidenNumElts / InNumElts;
+    SmallVector<SDValue, 16> Ops(NumConcat);
+    SDValue UndefVal = DAG.getUNDEF(InVT);
+    Ops[0] = InOp;
+    for (unsigned i = 1; i != NumConcat; ++i)
+      Ops[i] = UndefVal;
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+  }
+
+  if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+                       DAG.getIntPtrConstant(0));
+
+  // Fall back to extract and build.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = NVT.getVectorElementType();
+  unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+  unsigned Idx;
+  for (Idx = 0; Idx < MinNumElts; ++Idx)
+    Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                           DAG.getIntPtrConstant(Idx));
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for ( ; Idx < WidenNumElts; ++Idx)
+    Ops[Idx] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 0000000..ea716fd
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -0,0 +1,13 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMSelectionDAG
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
new file mode 100644
index 0000000..f88b26d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDNodeOrdering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEORDERING_H
+#define LLVM_CODEGEN_SDNODEORDERING_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class SDNode;
+
+/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each
+/// SDNode that roughly corresponds to the ordering of the original LLVM
+/// instruction. This is used for turning off scheduling, because we'll forgo
+/// the normal scheduling algorithms and output the instructions according to
+/// this ordering.
+class SDNodeOrdering {
+  DenseMap<const SDNode*, unsigned> OrderMap;
+
+  void operator=(const SDNodeOrdering&);   // Do not implement.
+  SDNodeOrdering(const SDNodeOrdering&);   // Do not implement.
+public:
+  SDNodeOrdering() {}
+
+  void add(const SDNode *Node, unsigned O) {
+    OrderMap[Node] = O;
+  }
+  void remove(const SDNode *Node) {
+    DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
+    if (Itr != OrderMap.end())
+      OrderMap.erase(Itr);
+  }
+  void clear() {
+    OrderMap.clear();
+  }
+  unsigned getOrder(const SDNode *Node) {
+    return OrderMap[Node];
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 0000000..ad8630a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,604 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds,    "Number of nodes unfolded");
+STATISTIC(NumDups,       "Number of duplicated nodes");
+STATISTIC(NumPRCopies,   "Number of physical copies");
+
+static RegisterScheduler
+  fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+                   createFastDAGScheduler);
+
+namespace {
+  /// FastPriorityQueue - A degenerate priority queue that considers
+  /// all nodes to have the same priority.
+  ///
+  struct FastPriorityQueue {
+    SmallVector<SUnit *, 16> Queue;
+
+    bool empty() const { return Queue.empty(); }
+    
+    void push(SUnit *U) {
+      Queue.push_back(U);
+    }
+
+    SUnit *pop() {
+      if (empty()) return NULL;
+      SUnit *V = Queue.back();
+      Queue.pop_back();
+      return V;
+    }
+  };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  FastPriorityQueue AvailableQueue;
+
+  /// LiveRegDefs - A set of physical registers and their definition
+  /// that are "live". These nodes must be scheduled before any other nodes that
+  /// modifies the registers can be scheduled.
+  unsigned NumLiveRegs;
+  std::vector<SUnit*> LiveRegDefs;
+  std::vector<unsigned> LiveRegCycles;
+
+public:
+  ScheduleDAGFast(MachineFunction &mf)
+    : ScheduleDAGSDNodes(mf) {}
+
+  void Schedule();
+
+  /// AddPred - adds a predecessor edge to SUnit SU.
+  /// This returns true if this is a new predecessor.
+  void AddPred(SUnit *SU, const SDep &D) {
+    SU->addPred(D);
+  }
+
+  /// RemovePred - removes a predecessor edge from SUnit SU.
+  /// This returns true if an edge was removed.
+  void RemovePred(SUnit *SU, const SDep &D) {
+    SU->removePred(D);
+  }
+
+private:
+  void ReleasePred(SUnit *SU, SDep *PredEdge);
+  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+  void ScheduleNodeBottomUp(SUnit*, unsigned);
+  SUnit *CopyAndMoveSuccessors(SUnit*);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
+  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+  void ListScheduleBottomUp();
+
+  /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+  bool ForceUnitLatencies() const { return true; }
+};
+}  // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+  NumLiveRegs = 0;
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
+  LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  // Execute the actual scheduling loop.
+  ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+//  Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+
+  // If all the node's successors are scheduled, this node is ready
+  // to be scheduled. Ignore the special EntrySU node.
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+    PredSU->isAvailable = true;
+    AvailableQueue.push(PredSU);
+  }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+  // Bottom up: release predecessors
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    ReleasePred(SU, &*I);
+    if (I->isAssignedRegDep()) {
+      // This is a physical register dependency and it's impossible or
+      // expensive to copy the register. Make sure nothing that can 
+      // clobber the register is scheduled between the predecessor and
+      // this node.
+      if (!LiveRegDefs[I->getReg()]) {
+        ++NumLiveRegs;
+        LiveRegDefs[I->getReg()] = I->getSUnit();
+        LiveRegCycles[I->getReg()] = CurCycle;
+      }
+    }
+  }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+  SU->setHeightToAtLeast(CurCycle);
+  Sequence.push_back(SU);
+
+  ReleasePredecessors(SU, CurCycle);
+
+  // Release all the implicit physical register defs that are live.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        assert(LiveRegDefs[I->getReg()] == SU &&
+               "Physical register dependency violated?");
+        --NumLiveRegs;
+        LiveRegDefs[I->getReg()] = NULL;
+        LiveRegCycles[I->getReg()] = 0;
+      }
+    }
+  }
+
+  SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+  if (SU->getNode()->getFlaggedNode())
+    return NULL;
+
+  SDNode *N = SU->getNode();
+  if (!N)
+    return NULL;
+
+  SUnit *NewSU;
+  bool TryUnfold = false;
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT == MVT::Flag)
+      return NULL;
+    else if (VT == MVT::Other)
+      TryUnfold = true;
+  }
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDValue &Op = N->getOperand(i);
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
+    if (VT == MVT::Flag)
+      return NULL;
+  }
+
+  if (TryUnfold) {
+    SmallVector<SDNode*, 2> NewNodes;
+    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+      return NULL;
+
+    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+    N = NewNodes[1];
+    SDNode *LoadNode = NewNodes[0];
+    unsigned NumVals = N->getNumValues();
+    unsigned OldNumVals = SU->getNode()->getNumValues();
+    for (unsigned i = 0; i != NumVals; ++i)
+      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+                                   SDValue(LoadNode, 1));
+
+    SUnit *NewSU = NewSUnit(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NewSU->NodeNum);
+      
+    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+        NewSU->isTwoAddress = true;
+        break;
+      }
+    }
+    if (TID.isCommutable())
+      NewSU->isCommutable = true;
+
+    // LoadNode may already exist. This can happen when there is another
+    // load from the same location and producing the same type of value
+    // but it has different alignment or volatileness.
+    bool isNewLoad = true;
+    SUnit *LoadSU;
+    if (LoadNode->getNodeId() != -1) {
+      LoadSU = &SUnits[LoadNode->getNodeId()];
+      isNewLoad = false;
+    } else {
+      LoadSU = NewSUnit(LoadNode);
+      LoadNode->setNodeId(LoadSU->NodeNum);
+    }
+
+    SDep ChainPred;
+    SmallVector<SDep, 4> ChainSuccs;
+    SmallVector<SDep, 4> LoadPreds;
+    SmallVector<SDep, 4> NodePreds;
+    SmallVector<SDep, 4> NodeSuccs;
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainPred = *I;
+      else if (I->getSUnit()->getNode() &&
+               I->getSUnit()->getNode()->isOperandOf(LoadNode))
+        LoadPreds.push_back(*I);
+      else
+        NodePreds.push_back(*I);
+    }
+    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainSuccs.push_back(*I);
+      else
+        NodeSuccs.push_back(*I);
+    }
+
+    if (ChainPred.getSUnit()) {
+      RemovePred(SU, ChainPred);
+      if (isNewLoad)
+        AddPred(LoadSU, ChainPred);
+    }
+    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+      const SDep &Pred = LoadPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad) {
+        AddPred(LoadSU, Pred);
+      }
+    }
+    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+      const SDep &Pred = NodePreds[i];
+      RemovePred(SU, Pred);
+      AddPred(NewSU, Pred);
+    }
+    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+      SDep D = NodeSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      D.setSUnit(NewSU);
+      AddPred(SuccDep, D);
+    }
+    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+      SDep D = ChainSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      if (isNewLoad) {
+        D.setSUnit(LoadSU);
+        AddPred(SuccDep, D);
+      }
+    } 
+    if (isNewLoad) {
+      AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+    }
+
+    ++NumUnfolds;
+
+    if (NewSU->NumSuccsLeft == 0) {
+      NewSU->isAvailable = true;
+      return NewSU;
+    }
+    SU = NewSU;
+  }
+
+  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  NewSU = Clone(SU);
+
+  // New SUnit has the exact same predecessors.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (!I->isArtificial())
+      AddPred(NewSU, *I);
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(NewSU);
+      AddPred(SuccSU, D);
+      D.setSUnit(SU);
+      DelDeps.push_back(std::make_pair(SuccSU, D));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  ++NumDups;
+  return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+                                              const TargetRegisterClass *DestRC,
+                                              const TargetRegisterClass *SrcRC,
+                                               SmallVector<SUnit*, 2> &Copies) {
+  SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+  CopyFromSU->CopySrcRC = SrcRC;
+  CopyFromSU->CopyDstRC = DestRC;
+
+  SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+  CopyToSU->CopySrcRC = DestRC;
+  CopyToSU->CopyDstRC = SrcRC;
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(CopyToSU);
+      AddPred(SuccSU, D);
+      DelDeps.push_back(std::make_pair(SuccSU, *I));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+  }
+
+  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+  Copies.push_back(CopyFromSU);
+  Copies.push_back(CopyToSU);
+
+  ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+                                 const TargetInstrInfo *TII) {
+  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = TID.getNumDefs();
+  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+    if (Reg == *ImpDef)
+      break;
+    ++NumRes;
+  }
+  return N->getValueType(NumRes);
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+                                               SmallVector<unsigned, 4> &LRegs){
+  if (NumLiveRegs == 0)
+    return false;
+
+  SmallSet<unsigned, 4> RegAdded;
+  // If this node would clobber any "live" register, then it's not ready.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      unsigned Reg = I->getReg();
+      if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
+        if (RegAdded.insert(Reg))
+          LRegs.push_back(Reg);
+      }
+      for (const unsigned *Alias = TRI->getAliasSet(Reg);
+           *Alias; ++Alias)
+        if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
+          if (RegAdded.insert(*Alias))
+            LRegs.push_back(*Alias);
+        }
+    }
+  }
+
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+    if (!Node->isMachineOpcode())
+      continue;
+    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+    if (!TID.ImplicitDefs)
+      continue;
+    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
+      if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
+        if (RegAdded.insert(*Reg))
+          LRegs.push_back(*Reg);
+      }
+      for (const unsigned *Alias = TRI->getAliasSet(*Reg);
+           *Alias; ++Alias)
+        if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+          if (RegAdded.insert(*Alias))
+            LRegs.push_back(*Alias);
+        }
+    }
+  }
+  return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+  unsigned CurCycle = 0;
+
+  // Release any predecessors of the special Exit node.
+  ReleasePredecessors(&ExitSU, CurCycle);
+
+  // Add root to Available queue.
+  if (!SUnits.empty()) {
+    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+    RootSU->isAvailable = true;
+    AvailableQueue.push(RootSU);
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  SmallVector<SUnit*, 4> NotReady;
+  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue.empty()) {
+    bool Delayed = false;
+    LRegsMap.clear();
+    SUnit *CurSU = AvailableQueue.pop();
+    while (CurSU) {
+      SmallVector<unsigned, 4> LRegs;
+      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+        break;
+      Delayed = true;
+      LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+      NotReady.push_back(CurSU);
+      CurSU = AvailableQueue.pop();
+    }
+
+    // All candidates are delayed due to live physical reg dependencies.
+    // Try code duplication or inserting cross class copies
+    // to resolve it.
+    if (Delayed && !CurSU) {
+      if (!CurSU) {
+        // Try duplicating the nodes that produces these
+        // "expensive to copy" values to break the dependency. In case even
+        // that doesn't work, insert cross class copies.
+        SUnit *TrySU = NotReady[0];
+        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+        assert(LRegs.size() == 1 && "Can't handle this yet!");
+        unsigned Reg = LRegs[0];
+        SUnit *LRDef = LiveRegDefs[Reg];
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        const TargetRegisterClass *RC =
+          TRI->getPhysicalRegisterRegClass(Reg, VT);
+        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+        // If cross copy register class is null, then it must be possible copy
+        // the value directly. Do not try duplicate the def.
+        SUnit *NewDef = 0;
+        if (DestRC)
+          NewDef = CopyAndMoveSuccessors(LRDef);
+        else
+          DestRC = RC;
+        if (!NewDef) {
+          // Issue copies, these can be expensive cross register class copies.
+          SmallVector<SUnit*, 2> Copies;
+          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+          DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+                       << " to SU #" << Copies.front()->NodeNum << "\n");
+          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+                              /*Reg=*/0, /*isNormalMemory=*/false,
+                              /*isMustAlias=*/false, /*isArtificial=*/true));
+          NewDef = Copies.back();
+        }
+
+        DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+                     << " to SU #" << TrySU->NodeNum << "\n");
+        LiveRegDefs[Reg] = NewDef;
+        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+                             /*Reg=*/0, /*isNormalMemory=*/false,
+                             /*isMustAlias=*/false, /*isArtificial=*/true));
+        TrySU->isAvailable = false;
+        CurSU = NewDef;
+      }
+
+      if (!CurSU) {
+        llvm_unreachable("Unable to resolve live physical register dependencies!");
+      }
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+      NotReady[i]->isPending = false;
+      // May no longer be available due to backtracking.
+      if (NotReady[i]->isAvailable)
+        AvailableQueue.push(NotReady[i]);
+    }
+    NotReady.clear();
+
+    if (CurSU)
+      ScheduleNodeBottomUp(CurSU, CurCycle);
+    ++CurCycle;
+  }
+
+  // Reverse the order since it is bottom up.
+  std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/true);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGFast(*IS->MF);
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 0000000..b92a672
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,269 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+  tdListDAGScheduler("list-td", "Top-down list scheduler",
+                     createTDListDAGScheduler);
+   
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation.  This supports
+/// top-down scheduling.
+///
+class ScheduleDAGList : public ScheduleDAGSDNodes {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+  
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands become available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+public:
+  ScheduleDAGList(MachineFunction &mf,
+                  SchedulingPriorityQueue *availqueue,
+                  ScheduleHazardRecognizer *HR)
+    : ScheduleDAGSDNodes(mf),
+      AvailableQueue(availqueue), HazardRec(HR) {
+    }
+
+  ~ScheduleDAGList() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void ReleaseSucc(SUnit *SU, const SDep &D);
+  void ReleaseSuccessors(SUnit *SU);
+  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void ListScheduleTopDown();
+};
+}  // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+  
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  AvailableQueue->initNodes(SUnits);
+  
+  ListScheduleTopDown();
+  
+  AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+  SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+  
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+    PendingQueue.push_back(SuccSU);
+}
+
+void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+  // Top down: release successors.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    assert(!I->isAssignedRegDep() &&
+           "The list-td scheduler doesn't yet support physreg dependencies!");
+
+    ReleaseSucc(SU, *I);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+  
+  Sequence.push_back(SU);
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+
+  ReleaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // Release any successors of the special Entry node.
+  ReleaseSuccessors(&EntrySU);
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+  
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i]->getDepth() == CurCycle) {
+        AvailableQueue->push(PendingQueue[i]);
+        PendingQueue[i]->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      } else {
+        assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+      }
+    }
+    
+    // If there are no instructions available, don't try to issue anything, and
+    // don't advance the hazard recognizer.
+    if (AvailableQueue->empty()) {
+      ++CurCycle;
+      continue;
+    }
+
+    SUnit *FoundSUnit = 0;
+    
+    bool HasNoopHazards = false;
+    while (!AvailableQueue->empty()) {
+      SUnit *CurSUnit = AvailableQueue->pop();
+      
+      ScheduleHazardRecognizer::HazardType HT =
+        HazardRec->getHazardType(CurSUnit);
+      if (HT == ScheduleHazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+    
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+      
+      NotReady.push_back(CurSUnit);
+    }
+    
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue->push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule, do it now.
+    if (FoundSUnit) {
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundSUnit);
+
+      // If this is a pseudo-op node, we don't want to increment the current
+      // cycle.
+      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+        ++CurCycle;        
+    } else if (!HasNoopHazards) {
+      // Otherwise, we have a pipeline stall, but no other problem, just advance
+      // the current cycle and try again.
+      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+      HazardRec->AdvanceCycle();
+      ++NumStalls;
+      ++CurCycle;
+    } else {
+      // Otherwise, we have no instructions to issue and we have instructions
+      // that will fault if we don't do this right.  This is the case for
+      // processors without pipeline interlocks and other cases.
+      DEBUG(dbgs() << "*** Emitting noop\n");
+      HazardRec->EmitNoop();
+      Sequence.push_back(0);   // NULL here means noop
+      ++NumNoops;
+      ++CurCycle;
+    }
+  }
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler with a
+/// new hazard recognizer. This scheduler takes ownership of the hazard
+/// recognizer and deletes it when done.
+ScheduleDAGSDNodes *
+llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGList(*IS->MF,
+                             new LatencyPriorityQueue(),
+                             IS->CreateTargetHazardRecognizer());
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000..3f1766d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,1596 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms.  The basic approach uses a priority
+// queue of available nodes to schedule.  One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds,    "Number of nodes unfolded");
+STATISTIC(NumDups,       "Number of duplicated nodes");
+STATISTIC(NumPRCopies,   "Number of physical register copies");
+
+static RegisterScheduler
+  burrListDAGScheduler("list-burr",
+                       "Bottom-up register reduction list scheduling",
+                       createBURRListDAGScheduler);
+static RegisterScheduler
+  tdrListrDAGScheduler("list-tdrr",
+                       "Top-down register reduction list scheduling",
+                       createTDRRListDAGScheduler);
+static RegisterScheduler
+  sourceListDAGScheduler("source",
+                         "Similar to list-burr but schedules in source "
+                         "order when possible",
+                         createSourceListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation.  This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+  /// it is top-down.
+  bool isBottomUp;
+
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  SchedulingPriorityQueue *AvailableQueue;
+
+  /// LiveRegDefs - A set of physical registers and their definition
+  /// that are "live". These nodes must be scheduled before any other nodes that
+  /// modifies the registers can be scheduled.
+  unsigned NumLiveRegs;
+  std::vector<SUnit*> LiveRegDefs;
+  std::vector<unsigned> LiveRegCycles;
+
+  /// Topo - A topological ordering for SUnits which permits fast IsReachable
+  /// and similar queries.
+  ScheduleDAGTopologicalSort Topo;
+
+public:
+  ScheduleDAGRRList(MachineFunction &mf,
+                    bool isbottomup,
+                    SchedulingPriorityQueue *availqueue)
+    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
+      AvailableQueue(availqueue), Topo(SUnits) {
+    }
+
+  ~ScheduleDAGRRList() {
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+  /// IsReachable - Checks if SU is reachable from TargetSU.
+  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+    return Topo.IsReachable(SU, TargetSU);
+  }
+
+  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+  /// create a cycle.
+  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+    return Topo.WillCreateCycle(SU, TargetSU);
+  }
+
+  /// AddPred - adds a predecessor edge to SUnit SU.
+  /// This returns true if this is a new predecessor.
+  /// Updates the topological ordering if required.
+  void AddPred(SUnit *SU, const SDep &D) {
+    Topo.AddPred(SU, D.getSUnit());
+    SU->addPred(D);
+  }
+
+  /// RemovePred - removes a predecessor edge from SUnit SU.
+  /// This returns true if an edge was removed.
+  /// Updates the topological ordering if required.
+  void RemovePred(SUnit *SU, const SDep &D) {
+    Topo.RemovePred(SU, D.getSUnit());
+    SU->removePred(D);
+  }
+
+private:
+  void ReleasePred(SUnit *SU, const SDep *PredEdge);
+  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
+  void ReleaseSuccessors(SUnit *SU);
+  void CapturePred(SDep *PredEdge);
+  void ScheduleNodeBottomUp(SUnit*, unsigned);
+  void ScheduleNodeTopDown(SUnit*, unsigned);
+  void UnscheduleNodeBottomUp(SUnit*);
+  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+  SUnit *CopyAndMoveSuccessors(SUnit*);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
+  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+  void ListScheduleTopDown();
+  void ListScheduleBottomUp();
+
+
+  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+  /// Updates the topological ordering if required.
+  SUnit *CreateNewSUnit(SDNode *N) {
+    unsigned NumSUnits = SUnits.size();
+    SUnit *NewNode = NewSUnit(N);
+    // Update the topological ordering.
+    if (NewNode->NodeNum >= NumSUnits)
+      Topo.InitDAGTopologicalSorting();
+    return NewNode;
+  }
+
+  /// CreateClone - Creates a new SUnit from an existing one.
+  /// Updates the topological ordering if required.
+  SUnit *CreateClone(SUnit *N) {
+    unsigned NumSUnits = SUnits.size();
+    SUnit *NewNode = Clone(N);
+    // Update the topological ordering.
+    if (NewNode->NodeNum >= NumSUnits)
+      Topo.InitDAGTopologicalSorting();
+    return NewNode;
+  }
+
+  /// ForceUnitLatencies - Return true, since register-pressure-reducing
+  /// scheduling doesn't need actual latency information.
+  bool ForceUnitLatencies() const { return true; }
+};
+}  // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+  NumLiveRegs = 0;
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
+  LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+  Topo.InitDAGTopologicalSorting();
+
+  AvailableQueue->initNodes(SUnits);
+  
+  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+  if (isBottomUp)
+    ListScheduleBottomUp();
+  else
+    ListScheduleTopDown();
+  
+  AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+
+  // If all the node's successors are scheduled, this node is ready
+  // to be scheduled. Ignore the special EntrySU node.
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+    PredSU->isAvailable = true;
+    AvailableQueue->push(PredSU);
+  }
+}
+
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+  // Bottom up: release predecessors
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    ReleasePred(SU, &*I);
+    if (I->isAssignedRegDep()) {
+      // This is a physical register dependency and it's impossible or
+      // expensive to copy the register. Make sure nothing that can 
+      // clobber the register is scheduled between the predecessor and
+      // this node.
+      if (!LiveRegDefs[I->getReg()]) {
+        ++NumLiveRegs;
+        LiveRegDefs[I->getReg()] = I->getSUnit();
+        LiveRegCycles[I->getReg()] = CurCycle;
+      }
+    }
+  }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+  SU->setHeightToAtLeast(CurCycle);
+  Sequence.push_back(SU);
+
+  ReleasePredecessors(SU, CurCycle);
+
+  // Release all the implicit physical register defs that are live.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        assert(LiveRegDefs[I->getReg()] == SU &&
+               "Physical register dependency violated?");
+        --NumLiveRegs;
+        LiveRegDefs[I->getReg()] = NULL;
+        LiveRegCycles[I->getReg()] = 0;
+      }
+    }
+  }
+
+  SU->isScheduled = true;
+  AvailableQueue->ScheduledNode(SU);
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {  
+  SUnit *PredSU = PredEdge->getSUnit();
+  if (PredSU->isAvailable) {
+    PredSU->isAvailable = false;
+    if (!PredSU->isPending)
+      AvailableQueue->remove(PredSU);
+  }
+
+  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+  ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+  DEBUG(SU->dump(this));
+
+  AvailableQueue->UnscheduledNode(SU);
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    CapturePred(&*I);
+    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
+      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+             "Physical register dependency violated?");
+      --NumLiveRegs;
+      LiveRegDefs[I->getReg()] = NULL;
+      LiveRegCycles[I->getReg()] = 0;
+    }
+  }
+
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      if (!LiveRegDefs[I->getReg()]) {
+        LiveRegDefs[I->getReg()] = SU;
+        ++NumLiveRegs;
+      }
+      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
+        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+    }
+  }
+
+  SU->setHeightDirty();
+  SU->isScheduled = false;
+  SU->isAvailable = true;
+  AvailableQueue->push(SU);
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
+                                          unsigned &CurCycle) {
+  SUnit *OldSU = NULL;
+  while (CurCycle > BtCycle) {
+    OldSU = Sequence.back();
+    Sequence.pop_back();
+    if (SU->isSucc(OldSU))
+      // Don't try to remove SU from AvailableQueue.
+      SU->isAvailable = false;
+    UnscheduleNodeBottomUp(OldSU);
+    --CurCycle;
+  }
+
+  assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+  ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getFlaggedNode()) {
+    if (SUNode->isOperandOf(N))
+      return true;
+  }
+  return false;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+  if (SU->getNode()->getFlaggedNode())
+    return NULL;
+
+  SDNode *N = SU->getNode();
+  if (!N)
+    return NULL;
+
+  SUnit *NewSU;
+  bool TryUnfold = false;
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT == MVT::Flag)
+      return NULL;
+    else if (VT == MVT::Other)
+      TryUnfold = true;
+  }
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDValue &Op = N->getOperand(i);
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
+    if (VT == MVT::Flag)
+      return NULL;
+  }
+
+  if (TryUnfold) {
+    SmallVector<SDNode*, 2> NewNodes;
+    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+      return NULL;
+
+    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+    N = NewNodes[1];
+    SDNode *LoadNode = NewNodes[0];
+    unsigned NumVals = N->getNumValues();
+    unsigned OldNumVals = SU->getNode()->getNumValues();
+    for (unsigned i = 0; i != NumVals; ++i)
+      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+                                   SDValue(LoadNode, 1));
+
+    // LoadNode may already exist. This can happen when there is another
+    // load from the same location and producing the same type of value
+    // but it has different alignment or volatileness.
+    bool isNewLoad = true;
+    SUnit *LoadSU;
+    if (LoadNode->getNodeId() != -1) {
+      LoadSU = &SUnits[LoadNode->getNodeId()];
+      isNewLoad = false;
+    } else {
+      LoadSU = CreateNewSUnit(LoadNode);
+      LoadNode->setNodeId(LoadSU->NodeNum);
+      ComputeLatency(LoadSU);
+    }
+
+    SUnit *NewSU = CreateNewSUnit(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NewSU->NodeNum);
+      
+    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+        NewSU->isTwoAddress = true;
+        break;
+      }
+    }
+    if (TID.isCommutable())
+      NewSU->isCommutable = true;
+    ComputeLatency(NewSU);
+
+    // Record all the edges to and from the old SU, by category.
+    SmallVector<SDep, 4> ChainPreds;
+    SmallVector<SDep, 4> ChainSuccs;
+    SmallVector<SDep, 4> LoadPreds;
+    SmallVector<SDep, 4> NodePreds;
+    SmallVector<SDep, 4> NodeSuccs;
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainPreds.push_back(*I);
+      else if (isOperandOf(I->getSUnit(), LoadNode))
+        LoadPreds.push_back(*I);
+      else
+        NodePreds.push_back(*I);
+    }
+    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainSuccs.push_back(*I);
+      else
+        NodeSuccs.push_back(*I);
+    }
+
+    // Now assign edges to the newly-created nodes.
+    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+      const SDep &Pred = ChainPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad)
+        AddPred(LoadSU, Pred);
+    }
+    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+      const SDep &Pred = LoadPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad)
+        AddPred(LoadSU, Pred);
+    }
+    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+      const SDep &Pred = NodePreds[i];
+      RemovePred(SU, Pred);
+      AddPred(NewSU, Pred);
+    }
+    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+      SDep D = NodeSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      D.setSUnit(NewSU);
+      AddPred(SuccDep, D);
+    }
+    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+      SDep D = ChainSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      if (isNewLoad) {
+        D.setSUnit(LoadSU);
+        AddPred(SuccDep, D);
+      }
+    } 
+
+    // Add a data dependency to reflect that NewSU reads the value defined
+    // by LoadSU.
+    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+
+    if (isNewLoad)
+      AvailableQueue->addNode(LoadSU);
+    AvailableQueue->addNode(NewSU);
+
+    ++NumUnfolds;
+
+    if (NewSU->NumSuccsLeft == 0) {
+      NewSU->isAvailable = true;
+      return NewSU;
+    }
+    SU = NewSU;
+  }
+
+  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  NewSU = CreateClone(SU);
+
+  // New SUnit has the exact same predecessors.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (!I->isArtificial())
+      AddPred(NewSU, *I);
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(NewSU);
+      AddPred(SuccSU, D);
+      D.setSUnit(SU);
+      DelDeps.push_back(std::make_pair(SuccSU, D));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  AvailableQueue->updateNode(SU);
+  AvailableQueue->addNode(NewSU);
+
+  ++NumDups;
+  return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+                                               const TargetRegisterClass *DestRC,
+                                               const TargetRegisterClass *SrcRC,
+                                               SmallVector<SUnit*, 2> &Copies) {
+  SUnit *CopyFromSU = CreateNewSUnit(NULL);
+  CopyFromSU->CopySrcRC = SrcRC;
+  CopyFromSU->CopyDstRC = DestRC;
+
+  SUnit *CopyToSU = CreateNewSUnit(NULL);
+  CopyToSU->CopySrcRC = DestRC;
+  CopyToSU->CopyDstRC = SrcRC;
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(CopyToSU);
+      AddPred(SuccSU, D);
+      DelDeps.push_back(std::make_pair(SuccSU, *I));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+  AvailableQueue->updateNode(SU);
+  AvailableQueue->addNode(CopyFromSU);
+  AvailableQueue->addNode(CopyToSU);
+  Copies.push_back(CopyFromSU);
+  Copies.push_back(CopyToSU);
+
+  ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+                                 const TargetInstrInfo *TII) {
+  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = TID.getNumDefs();
+  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+    if (Reg == *ImpDef)
+      break;
+    ++NumRes;
+  }
+  return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+                               std::vector<SUnit*> &LiveRegDefs,
+                               SmallSet<unsigned, 4> &RegAdded,
+                               SmallVector<unsigned, 4> &LRegs,
+                               const TargetRegisterInfo *TRI) {
+  bool Added = false;
+  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+    if (RegAdded.insert(Reg)) {
+      LRegs.push_back(Reg);
+      Added = true;
+    }
+  }
+  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+      if (RegAdded.insert(*Alias)) {
+        LRegs.push_back(*Alias);
+        Added = true;
+      }
+    }
+  return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
+                                                 SmallVector<unsigned, 4> &LRegs){
+  if (NumLiveRegs == 0)
+    return false;
+
+  SmallSet<unsigned, 4> RegAdded;
+  // If this node would clobber any "live" register, then it's not ready.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep())
+      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+                         RegAdded, LRegs, TRI);
+  }
+
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+    if (Node->getOpcode() == ISD::INLINEASM) {
+      // Inline asm can clobber physical defs.
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+        --NumOps;  // Ignore the flag operand.
+
+      for (unsigned i = 2; i != NumOps;) {
+        unsigned Flags =
+          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+        unsigned NumVals = (Flags & 0xffff) >> 3;
+
+        ++i; // Skip the ID value.
+        if ((Flags & 7) == 2 || (Flags & 7) == 6) {
+          // Check for def of register or earlyclobber register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
+              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+          }
+        } else
+          i += NumVals;
+      }
+      continue;
+    }
+
+    if (!Node->isMachineOpcode())
+      continue;
+    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+    if (!TID.ImplicitDefs)
+      continue;
+    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
+      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+  }
+  return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+  unsigned CurCycle = 0;
+
+  // Release any predecessors of the special Exit node.
+  ReleasePredecessors(&ExitSU, CurCycle);
+
+  // Add root to Available queue.
+  if (!SUnits.empty()) {
+    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+    RootSU->isAvailable = true;
+    AvailableQueue->push(RootSU);
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  SmallVector<SUnit*, 4> NotReady;
+  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty()) {
+    bool Delayed = false;
+    LRegsMap.clear();
+    SUnit *CurSU = AvailableQueue->pop();
+    while (CurSU) {
+      SmallVector<unsigned, 4> LRegs;
+      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+        break;
+      Delayed = true;
+      LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+      NotReady.push_back(CurSU);
+      CurSU = AvailableQueue->pop();
+    }
+
+    // All candidates are delayed due to live physical reg dependencies.
+    // Try backtracking, code duplication, or inserting cross class copies
+    // to resolve it.
+    if (Delayed && !CurSU) {
+      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+        SUnit *TrySU = NotReady[i];
+        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+        // Try unscheduling up to the point where it's safe to schedule
+        // this node.
+        unsigned LiveCycle = CurCycle;
+        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+          unsigned Reg = LRegs[j];
+          unsigned LCycle = LiveRegCycles[Reg];
+          LiveCycle = std::min(LiveCycle, LCycle);
+        }
+        SUnit *OldSU = Sequence[LiveCycle];
+        if (!WillCreateCycle(TrySU, OldSU))  {
+          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
+          // Force the current node to be scheduled before the node that
+          // requires the physical reg dep.
+          if (OldSU->isAvailable) {
+            OldSU->isAvailable = false;
+            AvailableQueue->remove(OldSU);
+          }
+          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
+                              /*Reg=*/0, /*isNormalMemory=*/false,
+                              /*isMustAlias=*/false, /*isArtificial=*/true));
+          // If one or more successors has been unscheduled, then the current
+          // node is no longer avaialable. Schedule a successor that's now
+          // available instead.
+          if (!TrySU->isAvailable)
+            CurSU = AvailableQueue->pop();
+          else {
+            CurSU = TrySU;
+            TrySU->isPending = false;
+            NotReady.erase(NotReady.begin()+i);
+          }
+          break;
+        }
+      }
+
+      if (!CurSU) {
+        // Can't backtrack. If it's too expensive to copy the value, then try
+        // duplicate the nodes that produces these "too expensive to copy"
+        // values to break the dependency. In case even that doesn't work,
+        // insert cross class copies.
+        // If it's not too expensive, i.e. cost != -1, issue copies.
+        SUnit *TrySU = NotReady[0];
+        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+        assert(LRegs.size() == 1 && "Can't handle this yet!");
+        unsigned Reg = LRegs[0];
+        SUnit *LRDef = LiveRegDefs[Reg];
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        const TargetRegisterClass *RC =
+          TRI->getPhysicalRegisterRegClass(Reg, VT);
+        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+        // If cross copy register class is null, then it must be possible copy
+        // the value directly. Do not try duplicate the def.
+        SUnit *NewDef = 0;
+        if (DestRC)
+          NewDef = CopyAndMoveSuccessors(LRDef);
+        else
+          DestRC = RC;
+        if (!NewDef) {
+          // Issue copies, these can be expensive cross register class copies.
+          SmallVector<SUnit*, 2> Copies;
+          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+          DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
+                       << " to SU #" << Copies.front()->NodeNum << "\n");
+          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+                              /*Reg=*/0, /*isNormalMemory=*/false,
+                              /*isMustAlias=*/false,
+                              /*isArtificial=*/true));
+          NewDef = Copies.back();
+        }
+
+        DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
+                     << " to SU #" << TrySU->NodeNum << "\n");
+        LiveRegDefs[Reg] = NewDef;
+        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+                             /*Reg=*/0, /*isNormalMemory=*/false,
+                             /*isMustAlias=*/false,
+                             /*isArtificial=*/true));
+        TrySU->isAvailable = false;
+        CurSU = NewDef;
+      }
+
+      assert(CurSU && "Unable to resolve live physical register dependencies!");
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+      NotReady[i]->isPending = false;
+      // May no longer be available due to backtracking.
+      if (NotReady[i]->isAvailable)
+        AvailableQueue->push(NotReady[i]);
+    }
+    NotReady.clear();
+
+    if (CurSU)
+      ScheduleNodeBottomUp(CurSU, CurCycle);
+    ++CurCycle;
+  }
+
+  // Reverse the order if it is bottom up.
+  std::reverse(Sequence.begin(), Sequence.end());
+  
+#ifndef NDEBUG
+  VerifySchedule(isBottomUp);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+    SuccSU->isAvailable = true;
+    AvailableQueue->push(SuccSU);
+  }
+}
+
+void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
+  // Top down: release successors
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    assert(!I->isAssignedRegDep() &&
+           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
+
+    ReleaseSucc(SU, &*I);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+  Sequence.push_back(SU);
+
+  ReleaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // Release any successors of the special Entry node.
+  ReleaseSuccessors(&EntrySU);
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+  
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty()) {
+    SUnit *CurSU = AvailableQueue->pop();
+    
+    if (CurSU)
+      ScheduleNodeTopDown(CurSU, CurCycle);
+    ++CurCycle;
+  }
+  
+#ifndef NDEBUG
+  VerifySchedule(isBottomUp);
+#endif
+}
+
+
+//===----------------------------------------------------------------------===//
+//                RegReductionPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+// 
+namespace {
+  template<class SF>
+  class RegReductionPriorityQueue;
+  
+  /// Sorting functions for the Available queue.
+  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
+    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
+    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+
+  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
+    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
+    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+
+  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
+    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
+      : SPQ(spq) {}
+    src_ls_rr_sort(const src_ls_rr_sort &RHS)
+      : SPQ(RHS.SPQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+}  // end anonymous namespace
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+  if (SethiUllmanNumber != 0)
+    return SethiUllmanNumber;
+
+  unsigned Extra = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    SUnit *PredSU = I->getSUnit();
+    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+    if (PredSethiUllman > SethiUllmanNumber) {
+      SethiUllmanNumber = PredSethiUllman;
+      Extra = 0;
+    } else if (PredSethiUllman == SethiUllmanNumber)
+      ++Extra;
+  }
+
+  SethiUllmanNumber += Extra;
+
+  if (SethiUllmanNumber == 0)
+    SethiUllmanNumber = 1;
+  
+  return SethiUllmanNumber;
+}
+
+namespace {
+  template<class SF>
+  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
+    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
+    unsigned currentQueueId;
+
+  protected:
+    // SUnits - The SUnits for the current graph.
+    std::vector<SUnit> *SUnits;
+    
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    ScheduleDAGRRList *scheduleDAG;
+
+    // SethiUllmanNumbers - The SethiUllman number for each node.
+    std::vector<unsigned> SethiUllmanNumbers;
+
+  public:
+    RegReductionPriorityQueue(const TargetInstrInfo *tii,
+                              const TargetRegisterInfo *tri)
+      : Queue(SF(this)), currentQueueId(0),
+        TII(tii), TRI(tri), scheduleDAG(NULL) {}
+    
+    void initNodes(std::vector<SUnit> &sunits) {
+      SUnits = &sunits;
+      // Add pseudo dependency edges for two-address nodes.
+      AddPseudoTwoAddrDeps();
+      // Reroute edges to nodes with multiple uses.
+      PrescheduleNodesWithMultipleUses();
+      // Calculate node priorities.
+      CalculateSethiUllmanNumbers();
+    }
+
+    void addNode(const SUnit *SU) {
+      unsigned SUSize = SethiUllmanNumbers.size();
+      if (SUnits->size() > SUSize)
+        SethiUllmanNumbers.resize(SUSize*2, 0);
+      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+    }
+
+    void updateNode(const SUnit *SU) {
+      SethiUllmanNumbers[SU->NodeNum] = 0;
+      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+    }
+
+    void releaseState() {
+      SUnits = 0;
+      SethiUllmanNumbers.clear();
+    }
+
+    unsigned getNodePriority(const SUnit *SU) const {
+      assert(SU->NodeNum < SethiUllmanNumbers.size());
+      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+        // CopyToReg should be close to its uses to facilitate coalescing and
+        // avoid spilling.
+        return 0;
+      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+          Opc == TargetOpcode::SUBREG_TO_REG ||
+          Opc == TargetOpcode::INSERT_SUBREG)
+        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+        // close to their uses to facilitate coalescing.
+        return 0;
+      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+        // If SU does not have a register use, i.e. it doesn't produce a value
+        // that would be consumed (e.g. store), then it terminates a chain of
+        // computation.  Give it a large SethiUllman number so it will be
+        // scheduled right before its predecessors that it doesn't lengthen
+        // their live ranges.
+        return 0xffff;
+      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+        // If SU does not have a register def, schedule it close to its uses
+        // because it does not lengthen any live ranges.
+        return 0;
+      return SethiUllmanNumbers[SU->NodeNum];
+    }
+
+    unsigned getNodeOrdering(const SUnit *SU) const {
+      return scheduleDAG->DAG->GetOrdering(SU->getNode());
+    }
+    
+    unsigned size() const { return Queue.size(); }
+
+    bool empty() const { return Queue.empty(); }
+    
+    void push(SUnit *U) {
+      assert(!U->NodeQueueId && "Node in the queue already");
+      U->NodeQueueId = ++currentQueueId;
+      Queue.push(U);
+    }
+
+    void push_all(const std::vector<SUnit *> &Nodes) {
+      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+        push(Nodes[i]);
+    }
+    
+    SUnit *pop() {
+      if (empty()) return NULL;
+      SUnit *V = Queue.top();
+      Queue.pop();
+      V->NodeQueueId = 0;
+      return V;
+    }
+
+    void remove(SUnit *SU) {
+      assert(!Queue.empty() && "Queue is empty!");
+      assert(SU->NodeQueueId != 0 && "Not in queue!");
+      Queue.erase_one(SU);
+      SU->NodeQueueId = 0;
+    }
+
+    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 
+      scheduleDAG = scheduleDag; 
+    }
+
+  protected:
+    bool canClobber(const SUnit *SU, const SUnit *Op);
+    void AddPseudoTwoAddrDeps();
+    void PrescheduleNodesWithMultipleUses();
+    void CalculateSethiUllmanNumbers();
+  };
+
+  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+    BURegReductionPriorityQueue;
+
+  typedef RegReductionPriorityQueue<td_ls_rr_sort>
+    TDRegReductionPriorityQueue;
+
+  typedef RegReductionPriorityQueue<src_ls_rr_sort>
+    SrcRegReductionPriorityQueue;
+}
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+  unsigned MaxHeight = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain succs
+    unsigned Height = I->getSUnit()->getHeight();
+    // If there are bunch of CopyToRegs stacked up, they should be considered
+    // to be at the same position.
+    if (I->getSUnit()->getNode() &&
+        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+      Height = closestSucc(I->getSUnit())+1;
+    if (Height > MaxHeight)
+      MaxHeight = Height;
+  }
+  return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+  unsigned Scratches = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    Scratches++;
+  }
+  return Scratches;
+}
+
+template <typename RRSort>
+static bool BURRSort(const SUnit *left, const SUnit *right,
+                     const RegReductionPriorityQueue<RRSort> *SPQ) {
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  if (LPriority != RPriority)
+    return LPriority > RPriority;
+
+  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+  // e.g.
+  // t1 = op t2, c1
+  // t3 = op t4, c2
+  //
+  // and the following instructions are both ready.
+  // t2 = op c3
+  // t4 = op c4
+  //
+  // Then schedule t2 = op first.
+  // i.e.
+  // t4 = op c4
+  // t2 = op c3
+  // t1 = op t2, c1
+  // t3 = op t4, c2
+  //
+  // This creates more short live intervals.
+  unsigned LDist = closestSucc(left);
+  unsigned RDist = closestSucc(right);
+  if (LDist != RDist)
+    return LDist < RDist;
+
+  // How many registers becomes live when the node is scheduled.
+  unsigned LScratch = calcMaxScratches(left);
+  unsigned RScratch = calcMaxScratches(right);
+  if (LScratch != RScratch)
+    return LScratch > RScratch;
+
+  if (left->getHeight() != right->getHeight())
+    return left->getHeight() > right->getHeight();
+  
+  if (left->getDepth() != right->getDepth())
+    return left->getDepth() < right->getDepth();
+
+  assert(left->NodeQueueId && right->NodeQueueId && 
+         "NodeQueueId cannot be zero");
+  return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+  unsigned LOrder = SPQ->getNodeOrdering(left);
+  unsigned ROrder = SPQ->getNodeOrdering(right);
+
+  // Prefer an ordering where the lower the non-zero order number, the higher
+  // the preference.
+  if ((LOrder || ROrder) && LOrder != ROrder)
+    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+  return BURRSort(left, right, SPQ);
+}
+
+template<class SF>
+bool
+RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+  if (SU->isTwoAddress) {
+    unsigned Opc = SU->getNode()->getMachineOpcode();
+    const TargetInstrDesc &TID = TII->get(Opc);
+    unsigned NumRes = TID.getNumDefs();
+    unsigned NumOps = TID.getNumOperands() - NumRes;
+    for (unsigned i = 0; i != NumOps; ++i) {
+      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
+        SDNode *DU = SU->getNode()->getOperand(i).getNode();
+        if (DU->getNodeId() != -1 &&
+            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// hasCopyToRegUse - Return true if SU has a value successor that is a
+/// CopyToReg node.
+static bool hasCopyToRegUse(const SUnit *SU) {
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    const SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
+      return true;
+  }
+  return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+                                  const TargetInstrInfo *TII,
+                                  const TargetRegisterInfo *TRI) {
+  SDNode *N = SuccSU->getNode();
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+  assert(ImpDefs && "Caller should check hasPhysRegDefs");
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getFlaggedNode()) {
+    if (!SUNode->isMachineOpcode())
+      continue;
+    const unsigned *SUImpDefs =
+      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+    if (!SUImpDefs)
+      return false;
+    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+      EVT VT = N->getValueType(i);
+      if (VT == MVT::Flag || VT == MVT::Other)
+        continue;
+      if (!N->hasAnyUseOfValue(i))
+        continue;
+      unsigned Reg = ImpDefs[i - NumDefs];
+      for (;*SUImpDefs; ++SUImpDefs) {
+        unsigned SUReg = *SUImpDefs;
+        if (TRI->regsOverlap(Reg, SUReg))
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+///      N
+///    / |
+///   /  |
+///  U  store
+///  |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+///      N
+///      ||
+///      ||
+///     store
+///       |
+///       U
+///       |
+///      ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+template<class SF>
+void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+  // Visit all the nodes in topological order, working top-down.
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    // For now, only look at nodes with no data successors, such as stores.
+    // These are especially important, due to the heuristics in
+    // getNodePriority for nodes with no data successors.
+    if (SU->NumSuccs != 0)
+      continue;
+    // For now, only look at nodes with exactly one data predecessor.
+    if (SU->NumPreds != 1)
+      continue;
+    // Avoid prescheduling copies to virtual registers, which don't behave
+    // like other nodes from the perspective of scheduling heuristics.
+    if (SDNode *N = SU->getNode())
+      if (N->getOpcode() == ISD::CopyToReg &&
+          TargetRegisterInfo::isVirtualRegister
+            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+        continue;
+
+    // Locate the single data predecessor.
+    SUnit *PredSU = 0;
+    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+         EE = SU->Preds.end(); II != EE; ++II)
+      if (!II->isCtrl()) {
+        PredSU = II->getSUnit();
+        break;
+      }
+    assert(PredSU);
+
+    // Don't rewrite edges that carry physregs, because that requires additional
+    // support infrastructure.
+    if (PredSU->hasPhysRegDefs)
+      continue;
+    // Short-circuit the case where SU is PredSU's only data successor.
+    if (PredSU->NumSuccs == 1)
+      continue;
+    // Avoid prescheduling to copies from virtual registers, which don't behave
+    // like other nodes from the perspective of scheduling // heuristics.
+    if (SDNode *N = SU->getNode())
+      if (N->getOpcode() == ISD::CopyFromReg &&
+          TargetRegisterInfo::isVirtualRegister
+            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+        continue;
+
+    // Perform checks on the successors of PredSU.
+    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+         EE = PredSU->Succs.end(); II != EE; ++II) {
+      SUnit *PredSuccSU = II->getSUnit();
+      if (PredSuccSU == SU) continue;
+      // If PredSU has another successor with no data successors, for
+      // now don't attempt to choose either over the other.
+      if (PredSuccSU->NumSuccs == 0)
+        goto outer_loop_continue;
+      // Don't break physical register dependencies.
+      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+          goto outer_loop_continue;
+      // Don't introduce graph cycles.
+      if (scheduleDAG->IsReachable(SU, PredSuccSU))
+        goto outer_loop_continue;
+    }
+
+    // Ok, the transformation is safe and the heuristics suggest it is
+    // profitable. Update the graph.
+    DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
+                 << " next to PredSU # " << PredSU->NodeNum
+                 << " to guide scheduling in the presence of multiple uses\n");
+    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+      SDep Edge = PredSU->Succs[i];
+      assert(!Edge.isAssignedRegDep());
+      SUnit *SuccSU = Edge.getSUnit();
+      if (SuccSU != SU) {
+        Edge.setSUnit(PredSU);
+        scheduleDAG->RemovePred(SuccSU, Edge);
+        scheduleDAG->AddPred(SU, Edge);
+        Edge.setSUnit(SU);
+        scheduleDAG->AddPred(SuccSU, Edge);
+        --i;
+      }
+    }
+  outer_loop_continue:;
+  }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+template<class SF>
+void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    if (!SU->isTwoAddress)
+      continue;
+
+    SDNode *Node = SU->getNode();
+    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+      continue;
+
+    unsigned Opc = Node->getMachineOpcode();
+    const TargetInstrDesc &TID = TII->get(Opc);
+    unsigned NumRes = TID.getNumDefs();
+    unsigned NumOps = TID.getNumOperands() - NumRes;
+    for (unsigned j = 0; j != NumOps; ++j) {
+      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
+        continue;
+      SDNode *DU = SU->getNode()->getOperand(j).getNode();
+      if (DU->getNodeId() == -1)
+        continue;
+      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+      if (!DUSU) continue;
+      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+           E = DUSU->Succs.end(); I != E; ++I) {
+        if (I->isCtrl()) continue;
+        SUnit *SuccSU = I->getSUnit();
+        if (SuccSU == SU)
+          continue;
+        // Be conservative. Ignore if nodes aren't at roughly the same
+        // depth and height.
+        if (SuccSU->getHeight() < SU->getHeight() &&
+            (SU->getHeight() - SuccSU->getHeight()) > 1)
+          continue;
+        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+        // constrains whatever is using the copy, instead of the copy
+        // itself. In the case that the copy is coalesced, this
+        // preserves the intent of the pseudo two-address heurietics.
+        while (SuccSU->Succs.size() == 1 &&
+               SuccSU->getNode()->isMachineOpcode() &&
+               SuccSU->getNode()->getMachineOpcode() ==
+                 TargetOpcode::COPY_TO_REGCLASS)
+          SuccSU = SuccSU->Succs.front().getSUnit();
+        // Don't constrain non-instruction nodes.
+        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+          continue;
+        // Don't constrain nodes with physical register defs if the
+        // predecessor can clobber them.
+        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+            continue;
+        }
+        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+        // these may be coalesced away. We want them close to their uses.
+        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+            SuccOpc == TargetOpcode::INSERT_SUBREG ||
+            SuccOpc == TargetOpcode::SUBREG_TO_REG)
+          continue;
+        if ((!canClobber(SuccSU, DUSU) ||
+             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+             (!SU->isCommutable && SuccSU->isCommutable)) &&
+            !scheduleDAG->IsReachable(SuccSU, SU)) {
+          DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
+                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
+                                        /*Reg=*/0, /*isNormalMemory=*/false,
+                                        /*isMustAlias=*/false,
+                                        /*isArtificial=*/true));
+        }
+      }
+    }
+  }
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
+  
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
+/// predecessors of the successors of the SUnit SU. Stop when the provided
+/// limit is exceeded.
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 
+                                                    unsigned Limit) {
+  unsigned Sum = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    const SUnit *SuccSU = I->getSUnit();
+    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+         EE = SuccSU->Preds.end(); II != EE; ++II) {
+      SUnit *PredSU = II->getSUnit();
+      if (!PredSU->isScheduled)
+        if (++Sum > Limit)
+          return Sum;
+    }
+  }
+  return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
+  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
+  bool LIsFloater = LIsTarget && left->NumPreds == 0;
+  bool RIsFloater = RIsTarget && right->NumPreds == 0;
+  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
+  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
+
+  if (left->NumSuccs == 0 && right->NumSuccs != 0)
+    return false;
+  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+    return true;
+
+  if (LIsFloater)
+    LBonus -= 2;
+  if (RIsFloater)
+    RBonus -= 2;
+  if (left->NumSuccs == 1)
+    LBonus += 2;
+  if (right->NumSuccs == 1)
+    RBonus += 2;
+
+  if (LPriority+LBonus != RPriority+RBonus)
+    return LPriority+LBonus < RPriority+RBonus;
+
+  if (left->getDepth() != right->getDepth())
+    return left->getDepth() < right->getDepth();
+
+  if (left->NumSuccsLeft != right->NumSuccsLeft)
+    return left->NumSuccsLeft > right->NumSuccsLeft;
+
+  assert(left->NodeQueueId && right->NodeQueueId && 
+         "NodeQueueId cannot be zero");
+  return (left->NodeQueueId > right->NodeQueueId);
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  
+  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
+
+  ScheduleDAGRRList *SD =
+    new ScheduleDAGRRList(*IS->MF, true, PQ);
+  PQ->setScheduleDAG(SD);
+  return SD;  
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  
+  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
+
+  ScheduleDAGRRList *SD =
+    new ScheduleDAGRRList(*IS->MF, false, PQ);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  
+  SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
+
+  ScheduleDAGRRList *SD =
+    new ScheduleDAGRRList(*IS->MF, true, PQ);
+  PQ->setScheduleDAG(SD);
+  return SD;  
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 0000000..b51c61b
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,435 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+  : ScheduleDAG(mf) {
+}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
+                             MachineBasicBlock::iterator insertPos) {
+  DAG = dag;
+  ScheduleDAG::Run(bb, insertPos);
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+  SUnit *SU = NewSUnit(Old->getNode());
+  SU->OrigNode = Old->OrigNode;
+  SU->Latency = Old->Latency;
+  SU->isTwoAddress = Old->isTwoAddress;
+  SU->isCommutable = Old->isCommutable;
+  SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+  SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+  Old->isCloned = true;
+  return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+                                      const TargetRegisterInfo *TRI, 
+                                      const TargetInstrInfo *TII,
+                                      unsigned &PhysReg, int &Cost) {
+  if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+    return;
+
+  unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg))
+    return;
+
+  unsigned ResNo = User->getOperand(2).getResNo();
+  if (Def->isMachineOpcode()) {
+    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+    if (ResNo >= II.getNumDefs() &&
+        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+      PhysReg = Reg;
+      const TargetRegisterClass *RC =
+        TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+      Cost = RC->getCopyCost();
+    }
+  }
+}
+
+static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
+                     SelectionDAG *DAG) {
+  SmallVector<EVT, 4> VTs;
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+    VTs.push_back(N->getValueType(i));
+  if (AddFlag)
+    VTs.push_back(MVT::Flag);
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    Ops.push_back(N->getOperand(i));
+  if (Flag.getNode())
+    Ops.push_back(Flag);
+  SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+  DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Flag inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
+  SmallPtrSet<SDNode*, 16> Visited;
+  SmallVector<int64_t, 4> Offsets;
+  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI) {
+    SDNode *Node = &*NI;
+    if (!Node || !Node->isMachineOpcode())
+      continue;
+
+    unsigned Opc = Node->getMachineOpcode();
+    const TargetInstrDesc &TID = TII->get(Opc);
+    if (!TID.mayLoad())
+      continue;
+
+    SDNode *Chain = 0;
+    unsigned NumOps = Node->getNumOperands();
+    if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+      Chain = Node->getOperand(NumOps-1).getNode();
+    if (!Chain)
+      continue;
+
+    // Look for other loads of the same chain. Find loads that are loading from
+    // the same base pointer and different offsets.
+    Visited.clear();
+    Offsets.clear();
+    O2SMap.clear();
+    bool Cluster = false;
+    SDNode *Base = Node;
+    int64_t BaseOffset;
+    for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+         I != E; ++I) {
+      SDNode *User = *I;
+      if (User == Node || !Visited.insert(User))
+        continue;
+      int64_t Offset1, Offset2;
+      if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+          Offset1 == Offset2)
+        // FIXME: Should be ok if they addresses are identical. But earlier
+        // optimizations really should have eliminated one of the loads.
+        continue;
+      if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+        Offsets.push_back(Offset1);
+      O2SMap.insert(std::make_pair(Offset2, User));
+      Offsets.push_back(Offset2);
+      if (Offset2 < Offset1) {
+        Base = User;
+        BaseOffset = Offset2;
+      } else {
+        BaseOffset = Offset1;
+      }
+      Cluster = true;
+    }
+
+    if (!Cluster)
+      continue;
+
+    // Sort them in increasing order.
+    std::sort(Offsets.begin(), Offsets.end());
+
+    // Check if the loads are close enough.
+    SmallVector<SDNode*, 4> Loads;
+    unsigned NumLoads = 0;
+    int64_t BaseOff = Offsets[0];
+    SDNode *BaseLoad = O2SMap[BaseOff];
+    Loads.push_back(BaseLoad);
+    for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+      int64_t Offset = Offsets[i];
+      SDNode *Load = O2SMap[Offset];
+      if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
+                                        NumLoads))
+        break; // Stop right here. Ignore loads that are further away.
+      Loads.push_back(Load);
+      ++NumLoads;
+    }
+
+    if (NumLoads == 0)
+      continue;
+
+    // Cluster loads by adding MVT::Flag outputs and inputs. This also
+    // ensure they are scheduled in order of increasing addresses.
+    SDNode *Lead = Loads[0];
+    AddFlags(Lead, SDValue(0,0), true, DAG);
+    SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
+    for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
+      bool OutFlag = i < e-1;
+      SDNode *Load = Loads[i];
+      AddFlags(Load, InFlag, OutFlag, DAG);
+      if (OutFlag)
+        InFlag = SDValue(Load, Load->getNumValues()-1);
+      ++LoadsClustered;
+    }
+  }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+  // During scheduling, the NodeId field of SDNode is used to map SDNodes
+  // to their associated SUnits by holding SUnits table indices. A value
+  // of -1 means the SDNode does not yet have an associated SUnit.
+  unsigned NumNodes = 0;
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI) {
+    NI->setNodeId(-1);
+    ++NumNodes;
+  }
+
+  // Reserve entries in the vector for each of the SUnits we are creating.  This
+  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+  // invalidated.
+  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+  // This is a temporary workaround.
+  SUnits.reserve(NumNodes * 2);
+  
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI) {
+    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
+      continue;
+    
+    // If this node has already been processed, stop now.
+    if (NI->getNodeId() != -1) continue;
+    
+    SUnit *NodeSUnit = NewSUnit(NI);
+    
+    // See if anything is flagged to this node, if so, add them to flagged
+    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
+    // are required to be the last operand and result of a node.
+    
+    // Scan up to find flagged preds.
+    SDNode *N = NI;
+    while (N->getNumOperands() &&
+           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+      N = N->getOperand(N->getNumOperands()-1).getNode();
+      assert(N->getNodeId() == -1 && "Node already inserted!");
+      N->setNodeId(NodeSUnit->NodeNum);
+    }
+    
+    // Scan down to find any flagged succs.
+    N = NI;
+    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
+      SDValue FlagVal(N, N->getNumValues()-1);
+      
+      // There are either zero or one users of the Flag result.
+      bool HasFlagUse = false;
+      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 
+           UI != E; ++UI)
+        if (FlagVal.isOperandOf(*UI)) {
+          HasFlagUse = true;
+          assert(N->getNodeId() == -1 && "Node already inserted!");
+          N->setNodeId(NodeSUnit->NodeNum);
+          N = *UI;
+          break;
+        }
+      if (!HasFlagUse) break;
+    }
+    
+    // If there are flag operands involved, N is now the bottom-most node
+    // of the sequence of nodes that are flagged together.
+    // Update the SUnit.
+    NodeSUnit->setNode(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NodeSUnit->NodeNum);
+
+    // Assign the Latency field of NodeSUnit using target-provided information.
+    if (UnitLatencies)
+      NodeSUnit->Latency = 1;
+    else
+      ComputeLatency(NodeSUnit);
+  }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
+  // Pass 2: add the preds, succs, etc.
+  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+    SUnit *SU = &SUnits[su];
+    SDNode *MainNode = SU->getNode();
+    
+    if (MainNode->isMachineOpcode()) {
+      unsigned Opc = MainNode->getMachineOpcode();
+      const TargetInstrDesc &TID = TII->get(Opc);
+      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+          SU->isTwoAddress = true;
+          break;
+        }
+      }
+      if (TID.isCommutable())
+        SU->isCommutable = true;
+    }
+    
+    // Find all predecessors and successors of the group.
+    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+      if (N->isMachineOpcode() &&
+          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+        SU->hasPhysRegClobbers = true;
+        unsigned NumUsed = InstrEmitter::CountResults(N);
+        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+          --NumUsed;    // Skip over unused values at the end.
+        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+          SU->hasPhysRegDefs = true;
+      }
+      
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+        SDNode *OpN = N->getOperand(i).getNode();
+        if (isPassiveNode(OpN)) continue;   // Not scheduled.
+        SUnit *OpSU = &SUnits[OpN->getNodeId()];
+        assert(OpSU && "Node has no SUnit!");
+        if (OpSU == SU) continue;           // In the same group.
+
+        EVT OpVT = N->getOperand(i).getValueType();
+        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+        bool isChain = OpVT == MVT::Other;
+
+        unsigned PhysReg = 0;
+        int Cost = 1;
+        // Determine if this is a physical register dependency.
+        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+        assert((PhysReg == 0 || !isChain) &&
+               "Chain dependence via physreg data?");
+        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+        // emits a copy from the physical register to a virtual register unless
+        // it requires a cross class copy (cost < 0). That means we are only
+        // treating "expensive to copy" register dependency as physical register
+        // dependency. This may change in the future though.
+        if (Cost >= 0)
+          PhysReg = 0;
+
+        const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+                               OpSU->Latency, PhysReg);
+        if (!isChain && !UnitLatencies) {
+          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
+          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+        }
+
+        SU->addPred(dep);
+      }
+    }
+  }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input.  This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// flagged together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+  // Cluster loads from "near" addresses into combined SUnits.
+  ClusterNeighboringLoads();
+  // Populate the SUnits array.
+  BuildSchedUnits();
+  // Compute all the scheduling dependencies between nodes.
+  AddSchedEdges();
+}
+
+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  
+  // Compute the latency for the node.  We use the sum of the latencies for
+  // all nodes flagged together into this SUnit.
+  SU->Latency = 0;
+  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
+    if (N->isMachineOpcode()) {
+      SU->Latency += InstrItins.
+        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+    }
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+  if (!SU->getNode()) {
+    dbgs() << "PHYS REG COPY\n";
+    return;
+  }
+
+  SU->getNode()->dump(DAG);
+  dbgs() << "\n";
+  SmallVector<SDNode *, 4> FlaggedNodes;
+  for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
+    FlaggedNodes.push_back(N);
+  while (!FlaggedNodes.empty()) {
+    dbgs() << "    ";
+    FlaggedNodes.back()->dump(DAG);
+    dbgs() << "\n";
+    FlaggedNodes.pop_back();
+  }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+  InstrEmitter Emitter(BB, InsertPos);
+  DenseMap<SDValue, unsigned> VRBaseMap;
+  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    SUnit *SU = Sequence[i];
+    if (!SU) {
+      // Null SUnit* is a noop.
+      EmitNoop();
+      continue;
+    }
+
+    // For pre-regalloc scheduling, create instructions corresponding to the
+    // SDNode and any flagged SDNodes and append them to the block.
+    if (!SU->getNode()) {
+      // Emit a copy.
+      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      continue;
+    }
+
+    SmallVector<SDNode *, 4> FlaggedNodes;
+    for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
+         N = N->getFlaggedNode())
+      FlaggedNodes.push_back(N);
+    while (!FlaggedNodes.empty()) {
+      Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+                       VRBaseMap, EM);
+      FlaggedNodes.pop_back();
+    }
+    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+                     VRBaseMap, EM);
+  }
+
+  BB = Emitter.getBlock();
+  InsertPos = Emitter.getInsertPos();
+  return BB;
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 0000000..6b829b6
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,121 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+  /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+  /// 
+  /// Edges between SUnits are initially based on edges in the SelectionDAG,
+  /// and additional edges can be added by the schedulers as heuristics.
+  /// SDNodes such as Constants, Registers, and a few others that are not
+  /// interesting to schedulers are not allocated SUnits.
+  ///
+  /// SDNodes with MVT::Flag operands are grouped along with the flagged
+  /// nodes into a single SUnit so that they are scheduled together.
+  ///
+  /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+  /// edges.  Physical register dependence information is not carried in
+  /// the DAG and must be handled explicitly by schedulers.
+  ///
+  class ScheduleDAGSDNodes : public ScheduleDAG {
+  public:
+    SelectionDAG *DAG;                    // DAG of the current basic block
+
+    explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+    virtual ~ScheduleDAGSDNodes() {}
+
+    /// Run - perform scheduling.
+    ///
+    void Run(SelectionDAG *dag, MachineBasicBlock *bb,
+             MachineBasicBlock::iterator insertPos);
+
+    /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+    ///
+    static bool isPassiveNode(SDNode *Node) {
+      if (isa<ConstantSDNode>(Node))       return true;
+      if (isa<ConstantFPSDNode>(Node))     return true;
+      if (isa<RegisterSDNode>(Node))       return true;
+      if (isa<GlobalAddressSDNode>(Node))  return true;
+      if (isa<BasicBlockSDNode>(Node))     return true;
+      if (isa<FrameIndexSDNode>(Node))     return true;
+      if (isa<ConstantPoolSDNode>(Node))   return true;
+      if (isa<JumpTableSDNode>(Node))      return true;
+      if (isa<ExternalSymbolSDNode>(Node)) return true;
+      if (isa<BlockAddressSDNode>(Node))   return true;
+      if (Node->getOpcode() == ISD::EntryToken) return true;
+      return false;
+    }
+
+    /// NewSUnit - Creates a new SUnit and return a ptr to it.
+    ///
+    SUnit *NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+      const SUnit *Addr = 0;
+      if (!SUnits.empty())
+        Addr = &SUnits[0];
+#endif
+      SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+      assert((Addr == 0 || Addr == &SUnits[0]) &&
+             "SUnits std::vector reallocated on the fly!");
+      SUnits.back().OrigNode = &SUnits.back();
+      return &SUnits.back();
+    }
+
+    /// Clone - Creates a clone of the specified SUnit. It does not copy the
+    /// predecessors / successors info nor the temporary scheduling states.
+    ///
+    SUnit *Clone(SUnit *N);
+    
+    /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+    /// are input.  This SUnit graph is similar to the SelectionDAG, but
+    /// excludes nodes that aren't interesting to scheduling, and represents
+    /// flagged together nodes with a single SUnit.
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+    /// ComputeLatency - Compute node latency.
+    ///
+    virtual void ComputeLatency(SUnit *SU);
+
+    virtual MachineBasicBlock *
+    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+
+    /// Schedule - Order nodes according to selected style, filling
+    /// in the Sequence member.
+    ///
+    virtual void Schedule() = 0;
+
+    virtual void dumpNode(const SUnit *SU) const;
+
+    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+    virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+  private:
+    /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+    /// combined SUnits.
+    void ClusterNeighboringLoads();
+
+    /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+    void BuildSchedUnits();
+    void AddSchedEdges();
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000..6122a2a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,6325 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeOrdering.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+  SDVTList Res = {VTs, NumVTs};
+  return Res;
+}
+
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unknown FP format");
+  case MVT::f32:     return &APFloat::IEEEsingle;
+  case MVT::f64:     return &APFloat::IEEEdouble;
+  case MVT::f80:     return &APFloat::x87DoubleExtended;
+  case MVT::f128:    return &APFloat::IEEEquad;
+  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+  }
+}
+
+SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+
+//===----------------------------------------------------------------------===//
+//                              ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+  return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+                                           const APFloat& Val) {
+  assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+  // PPC long double cannot be converted to any other type.
+  if (VT == MVT::ppcf128 ||
+      &Val.getSemantics() == &APFloat::PPCDoubleDouble)
+    return false;
+
+  // convert modifies in place, so make a copy.
+  APFloat Val2 = APFloat(Val);
+  bool losesInfo;
+  (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+                      &losesInfo);
+  return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+//                              ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BIT_CONVERT)
+    N = N->getOperand(0).getNode();
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+  unsigned i = 0, e = N->getNumOperands();
+
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+
+  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // elements.
+  SDValue NotZero = N->getOperand(i);
+  if (isa<ConstantSDNode>(NotZero)) {
+    if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(NotZero)) {
+    if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
+                bitcastToAPInt().isAllOnesValue())
+      return false;
+  } else
+    return false;
+
+  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != NotZero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BIT_CONVERT)
+    N = N->getOperand(0).getNode();
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+  unsigned i = 0, e = N->getNumOperands();
+
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+
+  // Do not accept build_vectors that aren't all constants or which have non-0
+  // elements.
+  SDValue Zero = N->getOperand(i);
+  if (isa<ConstantSDNode>(Zero)) {
+    if (!cast<ConstantSDNode>(Zero)->isNullValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(Zero)) {
+    if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+      return false;
+  } else
+    return false;
+
+  // Okay, we have at least one 0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != Zero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+    return true;
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+  if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+    return false;
+  unsigned NumElems = N->getNumOperands();
+  for (unsigned i = 1; i < NumElems; ++i) {
+    SDValue V = N->getOperand(i);
+    if (V.getOpcode() != ISD::UNDEF)
+      return false;
+  }
+  return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+  // To perform this operation, we just need to swap the L and G bits of the
+  // operation.
+  unsigned OldL = (Operation >> 2) & 1;
+  unsigned OldG = (Operation >> 1) & 1;
+  return ISD::CondCode((Operation & ~6) |  // Keep the N, U, E bits
+                       (OldL << 1) |       // New G bit
+                       (OldG << 2));       // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+  unsigned Operation = Op;
+  if (isInteger)
+    Operation ^= 7;   // Flip L, G, E bits, but not U.
+  else
+    Operation ^= 15;  // Flip all of the condition bits.
+
+  if (Operation > ISD::SETTRUE2)
+    Operation &= ~8;  // Don't let N and U bits get set.
+
+  return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison.  Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Illegal integer setcc operation!");
+  case ISD::SETEQ:
+  case ISD::SETNE: return 0;
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETGT:
+  case ISD::SETGE: return 1;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUGT:
+  case ISD::SETUGE: return 2;
+  }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                       bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed integer setcc with an unsigned integer setcc.
+    return ISD::SETCC_INVALID;
+
+  unsigned Op = Op1 | Op2;  // Combine all of the condition bits.
+
+  // If the N and U bits get set then the resultant comparison DOES suddenly
+  // care about orderedness, and is true when ordered.
+  if (Op > ISD::SETTRUE2)
+    Op &= ~16;     // Clear the U bit if the N bit is set.
+
+  // Canonicalize illegal integer setcc's.
+  if (isInteger && Op == ISD::SETUNE)  // e.g. SETUGT | SETULT
+    Op = ISD::SETNE;
+
+  return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                        bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed setcc with an unsigned setcc.
+    return ISD::SETCC_INVALID;
+
+  // Combine all of the condition bits.
+  ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+  // Canonicalize illegal integer setcc's.
+  if (isInteger) {
+    switch (Result) {
+    default: break;
+    case ISD::SETUO : Result = ISD::SETFALSE; break;  // SETUGT & SETULT
+    case ISD::SETOEQ:                                 // SETEQ  & SETU[LG]E
+    case ISD::SETUEQ: Result = ISD::SETEQ   ; break;  // SETUGE & SETULE
+    case ISD::SETOLT: Result = ISD::SETULT  ; break;  // SETULT & SETNE
+    case ISD::SETOGT: Result = ISD::SETUGT  ; break;  // SETUGT & SETNE
+    }
+  }
+
+  return Result;
+}
+
+const TargetMachine &SelectionDAG::getTarget() const {
+  return MF->getTarget();
+}
+
+//===----------------------------------------------------------------------===//
+//                           SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC)  {
+  ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+  ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+                              const SDValue *Ops, unsigned NumOps) {
+  for (; NumOps; --NumOps, ++Ops) {
+    ID.AddPointer(Ops->getNode());
+    ID.AddInteger(Ops->getResNo());
+  }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+                              const SDUse *Ops, unsigned NumOps) {
+  for (; NumOps; --NumOps, ++Ops) {
+    ID.AddPointer(Ops->getNode());
+    ID.AddInteger(Ops->getResNo());
+  }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+                          unsigned short OpC, SDVTList VTList,
+                          const SDValue *OpList, unsigned N) {
+  AddNodeIDOpcode(ID, OpC);
+  AddNodeIDValueTypes(ID, VTList);
+  AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+  switch (N->getOpcode()) {
+  case ISD::TargetExternalSymbol:
+  case ISD::ExternalSymbol:
+    llvm_unreachable("Should only be used on nodes with operands");
+  default: break;  // Normal nodes don't need extra info.
+  case ISD::TargetConstant:
+  case ISD::Constant:
+    ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+    break;
+  case ISD::TargetConstantFP:
+  case ISD::ConstantFP: {
+    ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+    break;
+  }
+  case ISD::TargetGlobalAddress:
+  case ISD::GlobalAddress:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::GlobalTLSAddress: {
+    const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+    ID.AddPointer(GA->getGlobal());
+    ID.AddInteger(GA->getOffset());
+    ID.AddInteger(GA->getTargetFlags());
+    break;
+  }
+  case ISD::BasicBlock:
+    ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+    break;
+  case ISD::Register:
+    ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+    break;
+
+  case ISD::SRCVALUE:
+    ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+    break;
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+    break;
+  case ISD::JumpTable:
+  case ISD::TargetJumpTable:
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+    break;
+  case ISD::ConstantPool:
+  case ISD::TargetConstantPool: {
+    const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+    ID.AddInteger(CP->getAlignment());
+    ID.AddInteger(CP->getOffset());
+    if (CP->isMachineConstantPoolEntry())
+      CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+    else
+      ID.AddPointer(CP->getConstVal());
+    ID.AddInteger(CP->getTargetFlags());
+    break;
+  }
+  case ISD::LOAD: {
+    const LoadSDNode *LD = cast<LoadSDNode>(N);
+    ID.AddInteger(LD->getMemoryVT().getRawBits());
+    ID.AddInteger(LD->getRawSubclassData());
+    break;
+  }
+  case ISD::STORE: {
+    const StoreSDNode *ST = cast<StoreSDNode>(N);
+    ID.AddInteger(ST->getMemoryVT().getRawBits());
+    ID.AddInteger(ST->getRawSubclassData());
+    break;
+  }
+  case ISD::ATOMIC_CMP_SWAP:
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX: {
+    const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+    ID.AddInteger(AT->getMemoryVT().getRawBits());
+    ID.AddInteger(AT->getRawSubclassData());
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+    for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+         i != e; ++i)
+      ID.AddInteger(SVN->getMaskElt(i));
+    break;
+  }
+  case ISD::TargetBlockAddress:
+  case ISD::BlockAddress: {
+    ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
+    ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
+    break;
+  }
+  } // end switch (N->getOpcode())
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+  AddNodeIDOpcode(ID, N->getOpcode());
+  // Add the return value info.
+  AddNodeIDValueTypes(ID, N->getVTList());
+  // Add the operand info.
+  AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+  // Handle SDNode leafs with special info.
+  AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries volatility, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) {
+  assert((ConvType & 3) == ConvType &&
+         "ConvType may not require more than 2 bits!");
+  assert((AM & 7) == AM &&
+         "AM may not require more than 3 bits!");
+  return ConvType |
+         (AM << 2) |
+         (isVolatile << 5);
+}
+
+//===----------------------------------------------------------------------===//
+//                              SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+  if (N->getValueType(0) == MVT::Flag)
+    return true; // Never CSE anything that produces a flag.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::HANDLENODE:
+  case ISD::EH_LABEL:
+    return true;   // Never CSE these nodes.
+  }
+
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return true; // Never CSE anything that produces a flag.
+
+  return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted.
+  HandleSDNode Dummy(getRoot());
+
+  SmallVector<SDNode*, 128> DeadNodes;
+
+  // Add all obviously-dead nodes to the DeadNodes worklist.
+  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+    if (I->use_empty())
+      DeadNodes.push_back(I);
+
+  RemoveDeadNodes(DeadNodes);
+
+  // If the root changed (e.g. it was a dead load, update the root).
+  setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
+                                   DAGUpdateListener *UpdateListener) {
+
+  // Process the worklist, deleting the nodes and adding their uses to the
+  // worklist.
+  while (!DeadNodes.empty()) {
+    SDNode *N = DeadNodes.pop_back_val();
+
+    if (UpdateListener)
+      UpdateListener->NodeDeleted(N, 0);
+
+    // Take the node out of the appropriate CSE map.
+    RemoveNodeFromCSEMaps(N);
+
+    // Next, brutally remove the operand list.  This is safe to do, as there are
+    // no cycles in the graph.
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+      SDUse &Use = *I++;
+      SDNode *Operand = Use.getNode();
+      Use.set(SDValue());
+
+      // Now that we removed this operand, see if there are no uses of it left.
+      if (Operand->use_empty())
+        DeadNodes.push_back(Operand);
+    }
+
+    DeallocateNode(N);
+  }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+  SmallVector<SDNode*, 16> DeadNodes(1, N);
+  RemoveDeadNodes(DeadNodes, UpdateListener);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+  // First take this out of the appropriate CSE map.
+  RemoveNodeFromCSEMaps(N);
+
+  // Finally, remove uses due to operands of this node, remove from the
+  // AllNodes list, and delete the node.
+  DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+  assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+  assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+  // Drop all of the operands and decrement used node's use counts.
+  N->DropOperands();
+
+  DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+  if (N->OperandsNeedDelete)
+    delete[] N->OperandList;
+
+  // Set the opcode to DELETED_NODE to help catch bugs when node
+  // memory is reallocated.
+  N->NodeType = ISD::DELETED_NODE;
+
+  NodeAllocator.Deallocate(AllNodes.remove(N));
+
+  // Remove the ordering of this node.
+  Ordering->remove(N);
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it.  This is useful when we're about to delete or repurpose
+/// the node.  We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+  bool Erased = false;
+  switch (N->getOpcode()) {
+  case ISD::EntryToken:
+    llvm_unreachable("EntryToken should not be in CSEMaps!");
+    return false;
+  case ISD::HANDLENODE: return false;  // noop.
+  case ISD::CONDCODE:
+    assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+           "Cond code doesn't exist!");
+    Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+    CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+    break;
+  case ISD::ExternalSymbol:
+    Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+    break;
+  case ISD::TargetExternalSymbol: {
+    ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+    Erased = TargetExternalSymbols.erase(
+               std::pair<std::string,unsigned char>(ESN->getSymbol(),
+                                                    ESN->getTargetFlags()));
+    break;
+  }
+  case ISD::VALUETYPE: {
+    EVT VT = cast<VTSDNode>(N)->getVT();
+    if (VT.isExtended()) {
+      Erased = ExtendedValueTypeNodes.erase(VT);
+    } else {
+      Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+      ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
+    }
+    break;
+  }
+  default:
+    // Remove it from the CSE Map.
+    Erased = CSEMap.RemoveNode(N);
+    break;
+  }
+#ifndef NDEBUG
+  // Verify that the node was actually in one of the CSE maps, unless it has a
+  // flag result (which cannot be CSE'd) or is one of the special cases that are
+  // not subject to CSE.
+  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+      !N->isMachineOpcode() && !doNotCSE(N)) {
+    N->dump(this);
+    dbgs() << "\n";
+    llvm_unreachable("Node is not in map!");
+  }
+#endif
+  return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
+                                       DAGUpdateListener *UpdateListener) {
+  // For node types that aren't CSE'd, just act as if no identical node
+  // already exists.
+  if (!doNotCSE(N)) {
+    SDNode *Existing = CSEMap.GetOrInsertNode(N);
+    if (Existing != N) {
+      // If there was already an existing matching node, use ReplaceAllUsesWith
+      // to replace the dead one with the existing one.  This can cause
+      // recursive merging of other unrelated nodes down the line.
+      ReplaceAllUsesWith(N, Existing, UpdateListener);
+
+      // N is now dead.  Inform the listener if it exists and delete it.
+      if (UpdateListener)
+        UpdateListener->NodeDeleted(N, Existing);
+      DeleteNodeNotInCSEMaps(N);
+      return;
+    }
+  }
+
+  // If the node doesn't already exist, we updated it.  Inform a listener if
+  // it exists.
+  if (UpdateListener)
+    UpdateListener->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  SDValue Ops[] = { Op };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+                                           SDValue Op1, SDValue Op2,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  SDValue Ops[] = { Op1, Op2 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+                                           const SDValue *Ops,unsigned NumOps,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+/// VerifyNode - Sanity check the given node.  Aborts if it is invalid.
+void SelectionDAG::VerifyNode(SDNode *N) {
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case ISD::BUILD_PAIR: {
+    EVT VT = N->getValueType(0);
+    assert(N->getNumValues() == 1 && "Too many results!");
+    assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+           "Wrong return type!");
+    assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+    assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+           "Mismatched operand types!");
+    assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+           "Wrong operand type!");
+    assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+           "Wrong return type size");
+    break;
+  }
+  case ISD::BUILD_VECTOR: {
+    assert(N->getNumValues() == 1 && "Too many results!");
+    assert(N->getValueType(0).isVector() && "Wrong return type!");
+    assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+           "Wrong number of operands!");
+    EVT EltVT = N->getValueType(0).getVectorElementType();
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+      assert((I->getValueType() == EltVT ||
+             (EltVT.isInteger() && I->getValueType().isInteger() &&
+              EltVT.bitsLE(I->getValueType()))) &&
+            "Wrong operand type!");
+    break;
+  }
+  }
+}
+
+/// getEVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+  const Type *Ty = VT == MVT::iPTR ?
+                   PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+                   VT.getTypeForEVT(*getContext());
+
+  return TLI.getTargetData()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
+  : TLI(tli), FLI(fli), DW(0),
+    EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),
+              getVTList(MVT::Other)),
+    Root(getEntryNode()), Ordering(0) {
+  AllNodes.push_back(&EntryNode);
+  Ordering = new SDNodeOrdering();
+}
+
+void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
+                        DwarfWriter *dw) {
+  MF = &mf;
+  MMI = mmi;
+  DW = dw;
+  Context = &mf.getFunction()->getContext();
+}
+
+SelectionDAG::~SelectionDAG() {
+  allnodes_clear();
+  delete Ordering;
+}
+
+void SelectionDAG::allnodes_clear() {
+  assert(&*AllNodes.begin() == &EntryNode);
+  AllNodes.remove(AllNodes.begin());
+  while (!AllNodes.empty())
+    DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+  allnodes_clear();
+  OperandAllocator.Reset();
+  CSEMap.clear();
+
+  ExtendedValueTypeNodes.clear();
+  ExternalSymbols.clear();
+  TargetExternalSymbols.clear();
+  std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+            static_cast<CondCodeSDNode*>(0));
+  std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+            static_cast<SDNode*>(0));
+
+  EntryNode.UseList = 0;
+  AllNodes.push_back(&EntryNode);
+  Root = getEntryNode();
+  Ordering = new SDNodeOrdering();
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+  assert(!VT.isVector() &&
+         "getZeroExtendInReg should use the vector element type instead of "
+         "the vector type!");
+  if (Op.getValueType() == VT) return Op;
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  APInt Imm = APInt::getLowBitsSet(BitWidth,
+                                   VT.getSizeInBits());
+  return getNode(ISD::AND, DL, Op.getValueType(), Op,
+                 getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  SDValue NegOne =
+    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+  return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  assert((EltVT.getSizeInBits() >= 64 ||
+         (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+         "getConstant with a uint64_t value that doesn't fit in the type!");
+  return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+  return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+  assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
+         "APInt size does not match type size!");
+
+  unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+  ID.AddPointer(&Val);
+  void *IP = 0;
+  SDNode *N = NULL;
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+    if (!VT.isVector())
+      return SDValue(N, 0);
+
+  if (!N) {
+    N = NodeAllocator.Allocate<ConstantSDNode>();
+    new (N) ConstantSDNode(isT, &Val, EltVT);
+    CSEMap.InsertNode(N, IP);
+    AllNodes.push_back(N);
+  }
+
+  SDValue Result(N, 0);
+  if (VT.isVector()) {
+    SmallVector<SDValue, 8> Ops;
+    Ops.assign(VT.getVectorNumElements(), Result);
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
+                     VT, &Ops[0], Ops.size());
+  }
+  return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+  return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+  return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+  assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+  EVT EltVT =
+    VT.isVector() ? VT.getVectorElementType() : VT;
+
+  // Do the map lookup using the actual bit pattern for the floating point
+  // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+  // we don't have issues with SNANs.
+  unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+  ID.AddPointer(&V);
+  void *IP = 0;
+  SDNode *N = NULL;
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+    if (!VT.isVector())
+      return SDValue(N, 0);
+
+  if (!N) {
+    N = NodeAllocator.Allocate<ConstantFPSDNode>();
+    new (N) ConstantFPSDNode(isTarget, &V, EltVT);
+    CSEMap.InsertNode(N, IP);
+    AllNodes.push_back(N);
+  }
+
+  SDValue Result(N, 0);
+  if (VT.isVector()) {
+    SmallVector<SDValue, 8> Ops;
+    Ops.assign(VT.getVectorNumElements(), Result);
+    // FIXME DebugLoc info might be appropriate here
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
+                     VT, &Ops[0], Ops.size());
+  }
+  return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+  EVT EltVT =
+    VT.isVector() ? VT.getVectorElementType() : VT;
+  if (EltVT==MVT::f32)
+    return getConstantFP(APFloat((float)Val), VT, isTarget);
+  else
+    return getConstantFP(APFloat(Val), VT, isTarget);
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+                                       EVT VT, int64_t Offset,
+                                       bool isTargetGA,
+                                       unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTargetGA) &&
+         "Cannot set target flags on target-independent globals");
+
+  // Truncate (with sign-extension) the offset value to the pointer size.
+  EVT PTy = TLI.getPointerTy();
+  unsigned BitWidth = PTy.getSizeInBits();
+  if (BitWidth < 64)
+    Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar) {
+    // If GV is an alias then use the aliasee for determining thread-localness.
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+  }
+
+  unsigned Opc;
+  if (GVar && GVar->isThreadLocal())
+    Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+  else
+    Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddPointer(GV);
+  ID.AddInteger(Offset);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
+  new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(FI);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
+  new (N) FrameIndexSDNode(FI, VT, isTarget);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+                                   unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent jump tables");
+  unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(JTI);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
+  new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
+                                      unsigned Alignment, int Offset,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
+  if (Alignment == 0)
+    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  ID.AddPointer(C);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
+  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+                                      unsigned Alignment, int Offset,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
+  if (Alignment == 0)
+    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  C->AddSelectionDAGCSEId(ID);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
+  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MBB);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
+  new (N) BasicBlockSDNode(MBB);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+  if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+      ValueTypeNodes.size())
+    ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+  SDNode *&N = VT.isExtended() ?
+    ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+  if (N) return SDValue(N, 0);
+  N = NodeAllocator.Allocate<VTSDNode>();
+  new (N) VTSDNode(VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+  SDNode *&N = ExternalSymbols[Sym];
+  if (N) return SDValue(N, 0);
+  N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
+  new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+                                              unsigned char TargetFlags) {
+  SDNode *&N =
+    TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+                                                               TargetFlags)];
+  if (N) return SDValue(N, 0);
+  N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
+  new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+  if ((unsigned)Cond >= CondCodeNodes.size())
+    CondCodeNodes.resize(Cond+1);
+
+  if (CondCodeNodes[Cond] == 0) {
+    CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>();
+    new (N) CondCodeSDNode(Cond);
+    CondCodeNodes[Cond] = N;
+    AllNodes.push_back(N);
+  }
+
+  return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+  std::swap(N1, N2);
+  int NElts = M.size();
+  for (int i = 0; i != NElts; ++i) {
+    if (M[i] >= NElts)
+      M[i] -= NElts;
+    else if (M[i] >= 0)
+      M[i] += NElts;
+  }
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
+                                       SDValue N2, const int *Mask) {
+  assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+  assert(VT.isVector() && N1.getValueType().isVector() &&
+         "Vector Shuffle VTs must be a vectors");
+  assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+         && "Vector Shuffle VTs must have same element type");
+
+  // Canonicalize shuffle undef, undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+    return getUNDEF(VT);
+
+  // Validate that all indices in Mask are within the range of the elements
+  // input to the shuffle.
+  unsigned NElts = VT.getVectorNumElements();
+  SmallVector<int, 8> MaskVec;
+  for (unsigned i = 0; i != NElts; ++i) {
+    assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+    MaskVec.push_back(Mask[i]);
+  }
+
+  // Canonicalize shuffle v, v -> v, undef
+  if (N1 == N2) {
+    N2 = getUNDEF(VT);
+    for (unsigned i = 0; i != NElts; ++i)
+      if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+  }
+
+  // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
+  if (N1.getOpcode() == ISD::UNDEF)
+    commuteShuffle(N1, N2, MaskVec);
+
+  // Canonicalize all index into lhs, -> shuffle lhs, undef
+  // Canonicalize all index into rhs, -> shuffle rhs, undef
+  bool AllLHS = true, AllRHS = true;
+  bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+  for (unsigned i = 0; i != NElts; ++i) {
+    if (MaskVec[i] >= (int)NElts) {
+      if (N2Undef)
+        MaskVec[i] = -1;
+      else
+        AllLHS = false;
+    } else if (MaskVec[i] >= 0) {
+      AllRHS = false;
+    }
+  }
+  if (AllLHS && AllRHS)
+    return getUNDEF(VT);
+  if (AllLHS && !N2Undef)
+    N2 = getUNDEF(VT);
+  if (AllRHS) {
+    N1 = getUNDEF(VT);
+    commuteShuffle(N1, N2, MaskVec);
+  }
+
+  // If Identity shuffle, or all shuffle in to undef, return that node.
+  bool AllUndef = true;
+  bool Identity = true;
+  for (unsigned i = 0; i != NElts; ++i) {
+    if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+    if (MaskVec[i] >= 0) AllUndef = false;
+  }
+  if (Identity && NElts == N1.getValueType().getVectorNumElements())
+    return N1;
+  if (AllUndef)
+    return getUNDEF(VT);
+
+  FoldingSetNodeID ID;
+  SDValue Ops[2] = { N1, N2 };
+  AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+  for (unsigned i = 0; i != NElts; ++i)
+    ID.AddInteger(MaskVec[i]);
+
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  // Allocate the mask array for the node out of the BumpPtrAllocator, since
+  // SDNode doesn't have access to it.  This memory will be "leaked" when
+  // the node is deallocated, but recovered when the NodeAllocator is released.
+  int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+  memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+  ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
+  new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
+                                       SDValue Val, SDValue DTy,
+                                       SDValue STy, SDValue Rnd, SDValue Sat,
+                                       ISD::CvtCode Code) {
+  // If the src and dest types are the same and the conversion is between
+  // integer types of the same sign or two floats, no conversion is necessary.
+  if (DTy == STy &&
+      (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+    return Val;
+
+  FoldingSetNodeID ID;
+  SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+  AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
+  new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+  ID.AddInteger(RegNo);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
+  new (N) RegisterSDNode(RegNo, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
+                               SDValue Root,
+                               unsigned LabelID) {
+  FoldingSetNodeID ID;
+  SDValue Ops[] = { Root };
+  AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);
+  ID.AddInteger(LabelID);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<LabelSDNode>();
+  new (N) LabelSDNode(Opcode, dl, Root, LabelID);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddPointer(BA);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
+  new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+  assert((!V || isa<PointerType>(V->getType())) &&
+         "SrcValue is not a pointer?");
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(V);
+
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
+  new (N) SrcValueSDNode(V);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
+  EVT OpTy = Op.getValueType();
+  MVT ShTy = TLI.getShiftAmountTy();
+  if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+  ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
+  return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+  unsigned ByteSize = VT.getStoreSize();
+  const Type *Ty = VT.getTypeForEVT(*getContext());
+  unsigned StackAlign =
+  std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+
+  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+  return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+  unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+                            VT2.getStoreSizeInBits())/8;
+  const Type *Ty1 = VT1.getTypeForEVT(*getContext());
+  const Type *Ty2 = VT2.getTypeForEVT(*getContext());
+  const TargetData *TD = TLI.getTargetData();
+  unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+                            TD->getPrefTypeAlignment(Ty2));
+
+  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+  return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
+                                SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return getConstant(1, VT);
+
+  case ISD::SETOEQ:
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETONE:
+  case ISD::SETO:
+  case ISD::SETUO:
+  case ISD::SETUEQ:
+  case ISD::SETUNE:
+    assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+    break;
+  }
+
+  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+    const APInt &C2 = N2C->getAPIntValue();
+    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+      const APInt &C1 = N1C->getAPIntValue();
+
+      switch (Cond) {
+      default: llvm_unreachable("Unknown integer setcc!");
+      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, VT);
+      case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+      case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+      case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+      case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+      case ISD::SETLT:  return getConstant(C1.slt(C2), VT);
+      case ISD::SETGT:  return getConstant(C1.sgt(C2), VT);
+      case ISD::SETLE:  return getConstant(C1.sle(C2), VT);
+      case ISD::SETGE:  return getConstant(C1.sge(C2), VT);
+      }
+    }
+  }
+  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+      // No compile time operations on this type yet.
+      if (N1C->getValueType(0) == MVT::ppcf128)
+        return SDValue();
+
+      APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+      switch (Cond) {
+      default: break;
+      case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+      case ISD::SETNE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpLessThan, VT);
+      case ISD::SETLT:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+      case ISD::SETGT:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+      case ISD::SETLE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETGE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, VT);
+      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, VT);
+      case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+      case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+                                           R==APFloat::cmpLessThan, VT);
+      case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpUnordered, VT);
+      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+      }
+    } else {
+      // Ensure that the constant occurs on the RHS.
+      return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+    }
+  }
+
+  // Could not fold it.
+  return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+  // This predicate is not safe for vector operations.
+  if (Op.getValueType().isVector())
+    return false;
+
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+/// this predicate to simplify operations downstream.  Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+                                     unsigned Depth) const {
+  APInt KnownZero, KnownOne;
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+  return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
+                                     APInt &KnownZero, APInt &KnownOne,
+                                     unsigned Depth) const {
+  unsigned BitWidth = Mask.getBitWidth();
+  assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
+         "Mask size mismatches value type size!");
+
+  KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
+  if (Depth == 6 || Mask == 0)
+    return;  // Limit search depth.
+
+  APInt KnownZero2, KnownOne2;
+
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
+    KnownZero = ~KnownOne & Mask;
+    return;
+  case ISD::AND:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
+                      KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    return;
+  case ISD::OR:
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
+                      KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    return;
+  case ISD::XOR: {
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    return;
+  }
+  case ISD::MUL: {
+    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If low bits are zero in either operand, output low known-0 bits.
+    // Also compute a conserative estimate for high known-0 bits.
+    // More trickiness is possible, but this is sufficient for the
+    // interesting case of alignment computation.
+    KnownOne.clear();
+    unsigned TrailZ = KnownZero.countTrailingOnes() +
+                      KnownZero2.countTrailingOnes();
+    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                               KnownZero2.countLeadingOnes(),
+                               BitWidth) - BitWidth;
+
+    TrailZ = std::min(TrailZ, BitWidth);
+    LeadZ = std::min(LeadZ, BitWidth);
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+                APInt::getHighBitsSet(BitWidth, LeadZ);
+    KnownZero &= Mask;
+    return;
+  }
+  case ISD::UDIV: {
+    // For the purposes of computing leading zeros we can conservatively
+    // treat a udiv as a logical right shift by the power of 2 known to
+    // be less than the denominator.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(0),
+                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+    KnownOne2.clear();
+    KnownZero2.clear();
+    ComputeMaskedBits(Op.getOperand(1),
+                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+    if (RHSUnknownLeadingOnes != BitWidth)
+      LeadZ = std::min(BitWidth,
+                       LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+    return;
+  }
+  case ISD::SELECT:
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SELECT_CC:
+    ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+    if (Op.getResNo() != 1)
+      return;
+    // The boolean result conforms to getBooleanContents.  Fall through.
+  case ISD::SETCC:
+    // If we know the result of a setcc has the top bits zero, use this info.
+    if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
+        BitWidth > 1)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    return;
+  case ISD::SHL:
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero <<= ShAmt;
+      KnownOne  <<= ShAmt;
+      // low bits known zero.
+      KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+    }
+    return;
+  case ISD::SRL:
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    return;
+  case ISD::SRA:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      APInt InDemandedMask = (Mask << ShAmt);
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+      if (HighBits.getBoolValue())
+        InDemandedMask |= APInt::getSignBit(BitWidth);
+
+      ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      // Handle the sign bits.
+      APInt SignBit = APInt::getSignBit(BitWidth);
+      SignBit = SignBit.lshr(ShAmt);  // Adjust to where it is now in the mask.
+
+      if (KnownZero.intersects(SignBit)) {
+        KnownZero |= HighBits;  // New bits are known zero.
+      } else if (KnownOne.intersects(SignBit)) {
+        KnownOne  |= HighBits;  // New bits are known one.
+      }
+    }
+    return;
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    unsigned EBits = EVT.getScalarType().getSizeInBits();
+
+    // Sign extension.  Compute the demanded bits in the result that are not
+    // present in the input.
+    APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+
+    APInt InSignBit = APInt::getSignBit(EBits);
+    APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+
+    // If the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    InSignBit.zext(BitWidth);
+    if (NewBits.getBoolValue())
+      InputDemandedBits |= InSignBit;
+
+    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+                      KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero.intersects(InSignBit)) {         // Input sign bit known clear
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne.intersects(InSignBit)) {   // Input sign bit known set
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                              // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne  &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP: {
+    unsigned LowBits = Log2_32(BitWidth)+1;
+    KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+    KnownOne.clear();
+    return;
+  }
+  case ISD::LOAD: {
+    if (ISD::isZEXTLoad(Op.getNode())) {
+      LoadSDNode *LD = cast<LoadSDNode>(Op);
+      EVT VT = LD->getMemoryVT();
+      unsigned MemBits = VT.getScalarType().getSizeInBits();
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+    }
+    return;
+  }
+  case ISD::ZERO_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+    APInt InMask    = Mask;
+    InMask.trunc(InBits);
+    KnownZero.trunc(InBits);
+    KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    KnownZero.zext(BitWidth);
+    KnownOne.zext(BitWidth);
+    KnownZero |= NewBits;
+    return;
+  }
+  case ISD::SIGN_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InSignBit = APInt::getSignBit(InBits);
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+    APInt InMask = Mask;
+    InMask.trunc(InBits);
+
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded. Temporarily set this bit in the mask for our callee.
+    if (NewBits.getBoolValue())
+      InMask |= InSignBit;
+
+    KnownZero.trunc(InBits);
+    KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+
+    // Note if the sign bit is known to be zero or one.
+    bool SignBitKnownZero = KnownZero.isNegative();
+    bool SignBitKnownOne  = KnownOne.isNegative();
+    assert(!(SignBitKnownZero && SignBitKnownOne) &&
+           "Sign bit can't be known to be both zero and one!");
+
+    // If the sign bit wasn't actually demanded by our caller, we don't
+    // want it set in the KnownZero and KnownOne result values. Reset the
+    // mask and reapply it to the result values.
+    InMask = Mask;
+    InMask.trunc(InBits);
+    KnownZero &= InMask;
+    KnownOne  &= InMask;
+
+    KnownZero.zext(BitWidth);
+    KnownOne.zext(BitWidth);
+
+    // If the sign bit is known zero or one, the top bits match.
+    if (SignBitKnownZero)
+      KnownZero |= NewBits;
+    else if (SignBitKnownOne)
+      KnownOne  |= NewBits;
+    return;
+  }
+  case ISD::ANY_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask = Mask;
+    InMask.trunc(InBits);
+    KnownZero.trunc(InBits);
+    KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    KnownZero.zext(BitWidth);
+    KnownOne.zext(BitWidth);
+    return;
+  }
+  case ISD::TRUNCATE: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask = Mask;
+    InMask.zext(InBits);
+    KnownZero.zext(InBits);
+    KnownOne.zext(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero.trunc(BitWidth);
+    KnownOne.trunc(BitWidth);
+    break;
+  }
+  case ISD::AssertZext: {
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+                      KnownOne, Depth+1);
+    KnownZero |= (~InMask) & Mask;
+    return;
+  }
+  case ISD::FGETSIGN:
+    // All bits are zero except the low bit.
+    KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    return;
+
+  case ISD::SUB: {
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+      // We know that the top bits of C-X are clear if X contains less bits
+      // than C (i.e. no wrap-around can happen).  For example, 20-X is
+      // positive if we can prove that X is >= 0 and < 16.
+      if (CLHS->getAPIntValue().isNonNegative()) {
+        unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+        // NLZ can't be BitWidth with no sign bit
+        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+        ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
+                          Depth+1);
+
+        // If all of the MaskV bits are known to be zero, then we know the
+        // output top bits are zero, because we now know that the output is
+        // from [0-C].
+        if ((KnownZero2 & MaskV) == MaskV) {
+          unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+          // Top bits known zero.
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+        }
+      }
+    }
+  }
+  // fall through
+  case ISD::ADD: {
+    // Output known-0 bits are known if clear or set in both the low clear bits
+    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
+    // low 3 bits clear.
+    APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
+    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    KnownZeroOut = std::min(KnownZeroOut,
+                            KnownZero2.countTrailingOnes());
+
+    KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+    return;
+  }
+  case ISD::SREM:
+    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      const APInt &RA = Rem->getAPIntValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
+        if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+          KnownZero |= ~LowBits;
+
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        KnownZero &= Mask;
+        KnownOne &= Mask;
+
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+      }
+    }
+    return;
+  case ISD::UREM: {
+    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      const APInt &RA = Rem->getAPIntValue();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = (RA - 1);
+        APInt Mask2 = LowBits & Mask;
+        KnownZero |= ~LowBits & Mask;
+        ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+        break;
+      }
+    }
+
+    // Since the result is less than or equal to either operand, any leading
+    // zero bits in either operand must also exist in the result.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
+                      Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
+                      Depth+1);
+
+    uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+                                KnownZero2.countLeadingOnes());
+    KnownOne.clear();
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+    return;
+  }
+  default:
+    // Allow the target to implement this method for its nodes.
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+                                         Depth);
+    }
+    return;
+  }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+  EVT VT = Op.getValueType();
+  assert(VT.isInteger() && "Invalid VT!");
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned Tmp, Tmp2;
+  unsigned FirstAnswer = 1;
+
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::AssertSext:
+    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+    return VTBits-Tmp+1;
+  case ISD::AssertZext:
+    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+    return VTBits-Tmp;
+
+  case ISD::Constant: {
+    const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+    // If negative, return # leading ones.
+    if (Val.isNegative())
+      return Val.countLeadingOnes();
+
+    // Return # leading zeros.
+    return Val.countLeadingZeros();
+  }
+
+  case ISD::SIGN_EXTEND:
+    Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+  case ISD::SIGN_EXTEND_INREG:
+    // Max of the input and what this extends.
+    Tmp =
+      cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+    Tmp = VTBits-Tmp+1;
+
+    Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    return std::max(Tmp, Tmp2);
+
+  case ISD::SRA:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    // SRA X, C   -> adds C sign bits.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      Tmp += C->getZExtValue();
+      if (Tmp > VTBits) Tmp = VTBits;
+    }
+    return Tmp;
+  case ISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (C->getZExtValue() >= VTBits ||      // Bad shift.
+          C->getZExtValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getZExtValue();
+    }
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits at the worst.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp != 1) {
+      Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+      FirstAnswer = std::min(Tmp, Tmp2);
+      // We computed what we know about the sign bits as our first
+      // answer. Now proceed to the generic code that uses
+      // ComputeMaskedBits, and pick whichever answer is better.
+    }
+    break;
+
+  case ISD::SELECT:
+    Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+    return std::min(Tmp, Tmp2);
+
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+    if (Op.getResNo() != 1)
+      break;
+    // The boolean result conforms to getBooleanContents.  Fall through.
+  case ISD::SETCC:
+    // If setcc returns 0/-1, all bits are sign bits.
+    if (TLI.getBooleanContents() ==
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+      return VTBits;
+    break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+      // Handle rotate right by N like a rotate left by 32-N.
+      if (Op.getOpcode() == ISD::ROTR)
+        RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+      // If we aren't rotating out all of the known-in sign bits, return the
+      // number that are left.  This handles rotl(sext(x), 1) for example.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (Tmp > RotAmt+1) return Tmp-RotAmt;
+    }
+    break;
+  case ISD::ADD:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+
+    // Special case decrementing a value (ADD X, -1):
+    if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+      if (CRHS->isAllOnesValue()) {
+        APInt KnownZero, KnownOne;
+        APInt Mask = APInt::getAllOnesValue(VTBits);
+        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+          return VTBits;
+
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero.isNegative())
+          return Tmp;
+      }
+
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      return std::min(Tmp, Tmp2)-1;
+    break;
+
+  case ISD::SUB:
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+
+    // Handle NEG.
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CLHS->isNullValue()) {
+        APInt KnownZero, KnownOne;
+        APInt Mask = APInt::getAllOnesValue(VTBits);
+        ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+          return VTBits;
+
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero.isNegative())
+          return Tmp2;
+
+        // Otherwise, we treat this like a SUB.
+      }
+
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      return std::min(Tmp, Tmp2)-1;
+    break;
+  case ISD::TRUNCATE:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+
+  // Handle LOADX separately here. EXTLOAD case will fallthrough.
+  if (Op.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    unsigned ExtType = LD->getExtensionType();
+    switch (ExtType) {
+    default: break;
+    case ISD::SEXTLOAD:    // '17' bits known
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+      return VTBits-Tmp+1;
+    case ISD::ZEXTLOAD:    // '16' bits known
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+      return VTBits-Tmp;
+    }
+  }
+
+  // Allow the target to implement this method for its nodes.
+  if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_VOID) {
+    unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+    if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+  }
+
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  APInt KnownZero, KnownOne;
+  APInt Mask = APInt::getAllOnesValue(VTBits);
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+
+  if (KnownZero.isNegative()) {        // sign bit is 0
+    Mask = KnownZero;
+  } else if (KnownOne.isNegative()) {  // sign bit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return FirstAnswer;
+  }
+
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask = ~Mask;
+  Mask <<= Mask.getBitWidth()-VTBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+  // If we're told that NaNs won't happen, assume they won't.
+  if (FiniteOnlyFPMath())
+    return true;
+
+  // If the value is a constant, we can obviously see if it is a NaN or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->getValueAPF().isNaN();
+
+  // TODO: Recognize more cases here.
+
+  return false;
+}
+
+bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
+  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+  if (!GA) return false;
+  if (GA->getOffset() != 0) return false;
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+  if (!GV) return false;
+  MachineModuleInfo *MMI = getMachineModuleInfo();
+  return MMI && MMI->hasDebugInfo();
+}
+
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
+                                          unsigned i) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getMaskElt(i) < 0)
+    return getUNDEF(VT.getVectorElementType());
+  unsigned Index = N->getMaskElt(i);
+  unsigned NumElems = VT.getVectorNumElements();
+  SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+  Index %= NumElems;
+
+  if (V.getOpcode() == ISD::BIT_CONVERT) {
+    V = V.getOperand(0);
+    EVT VVT = V.getValueType();
+    if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
+      return SDValue();
+  }
+  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    return (Index == 0) ? V.getOperand(0)
+                      : getUNDEF(VT.getVectorElementType());
+  if (V.getOpcode() == ISD::BUILD_VECTOR)
+    return V.getOperand(Index);
+  if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V))
+    return getShuffleScalarElt(SVN, Index);
+  return SDValue();
+}
+
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<SDNode>();
+  new (N) SDNode(Opcode, DL, getVTList(VT));
+  CSEMap.InsertNode(N, IP);
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              EVT VT, SDValue Operand) {
+  // Constant fold unary operations with an integer constant operand.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+    const APInt &Val = C->getAPIntValue();
+    unsigned BitWidth = VT.getSizeInBits();
+    switch (Opcode) {
+    default: break;
+    case ISD::SIGN_EXTEND:
+      return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT);
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::TRUNCATE:
+      return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT);
+    case ISD::UINT_TO_FP:
+    case ISD::SINT_TO_FP: {
+      const uint64_t zero[] = {0, 0};
+      // No compile time operations on this type.
+      if (VT==MVT::ppcf128)
+        break;
+      APFloat apf = APFloat(APInt(BitWidth, 2, zero));
+      (void)apf.convertFromAPInt(Val,
+                                 Opcode==ISD::SINT_TO_FP,
+                                 APFloat::rmNearestTiesToEven);
+      return getConstantFP(apf, VT);
+    }
+    case ISD::BIT_CONVERT:
+      if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+        return getConstantFP(Val.bitsToFloat(), VT);
+      else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+        return getConstantFP(Val.bitsToDouble(), VT);
+      break;
+    case ISD::BSWAP:
+      return getConstant(Val.byteSwap(), VT);
+    case ISD::CTPOP:
+      return getConstant(Val.countPopulation(), VT);
+    case ISD::CTLZ:
+      return getConstant(Val.countLeadingZeros(), VT);
+    case ISD::CTTZ:
+      return getConstant(Val.countTrailingZeros(), VT);
+    }
+  }
+
+  // Constant fold unary operations with a floating point constant operand.
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+    APFloat V = C->getValueAPF();    // make copy
+    if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
+      switch (Opcode) {
+      case ISD::FNEG:
+        V.changeSign();
+        return getConstantFP(V, VT);
+      case ISD::FABS:
+        V.clearSign();
+        return getConstantFP(V, VT);
+      case ISD::FP_ROUND:
+      case ISD::FP_EXTEND: {
+        bool ignored;
+        // This can return overflow, underflow, or inexact; we don't care.
+        // FIXME need to be more flexible about rounding mode.
+        (void)V.convert(*EVTToAPFloatSemantics(VT),
+                        APFloat::rmNearestTiesToEven, &ignored);
+        return getConstantFP(V, VT);
+      }
+      case ISD::FP_TO_SINT:
+      case ISD::FP_TO_UINT: {
+        integerPart x[2];
+        bool ignored;
+        assert(integerPartWidth >= 64);
+        // FIXME need to be more flexible about rounding mode.
+        APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+                              Opcode==ISD::FP_TO_SINT,
+                              APFloat::rmTowardZero, &ignored);
+        if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
+          break;
+        APInt api(VT.getSizeInBits(), 2, x);
+        return getConstant(api, VT);
+      }
+      case ISD::BIT_CONVERT:
+        if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+          return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+        else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+          return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+        break;
+      }
+    }
+  }
+
+  unsigned OpOpcode = Operand.getNode()->getOpcode();
+  switch (Opcode) {
+  case ISD::TokenFactor:
+  case ISD::MERGE_VALUES:
+  case ISD::CONCAT_VECTORS:
+    return Operand;         // Factor, merge or concat of one node?  No need.
+  case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+  case ISD::FP_EXTEND:
+    assert(VT.isFloatingPoint() &&
+           Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+    if (Operand.getValueType() == VT) return Operand;  // noop conversion.
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (Operand.getOpcode() == ISD::UNDEF)
+      return getUNDEF(VT);
+    break;
+  case ISD::SIGN_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid SIGN_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid sext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+      return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+    break;
+  case ISD::ZERO_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid ZERO_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid zext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::ZERO_EXTEND)   // (zext (zext x)) -> (zext x)
+      return getNode(ISD::ZERO_EXTEND, DL, VT,
+                     Operand.getNode()->getOperand(0));
+    break;
+  case ISD::ANY_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid ANY_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid anyext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+      // (ext (zext x)) -> (zext x)  and  (ext (sext x)) -> (sext x)
+      return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+    break;
+  case ISD::TRUNCATE:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid TRUNCATE!");
+    if (Operand.getValueType() == VT) return Operand;   // noop truncate
+    assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+           "Invalid truncate node, src < dst!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::TRUNCATE)
+      return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+             OpOpcode == ISD::ANY_EXTEND) {
+      // If the source is smaller than the dest, we still need an extend.
+      if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+            .bitsLT(VT.getScalarType()))
+        return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+      else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+        return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+      else
+        return Operand.getNode()->getOperand(0);
+    }
+    break;
+  case ISD::BIT_CONVERT:
+    // Basic sanity checking.
+    assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+           && "Cannot BIT_CONVERT between types of different sizes!");
+    if (VT == Operand.getValueType()) return Operand;  // noop conversion.
+    if (OpOpcode == ISD::BIT_CONVERT)  // bitconv(bitconv(x)) -> bitconv(x)
+      return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+    break;
+  case ISD::SCALAR_TO_VECTOR:
+    assert(VT.isVector() && !Operand.getValueType().isVector() &&
+           (VT.getVectorElementType() == Operand.getValueType() ||
+            (VT.getVectorElementType().isInteger() &&
+             Operand.getValueType().isInteger() &&
+             VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+           "Illegal SCALAR_TO_VECTOR node!");
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+    // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+    if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+        isa<ConstantSDNode>(Operand.getOperand(1)) &&
+        Operand.getConstantOperandVal(1) == 0 &&
+        Operand.getOperand(0).getValueType() == VT)
+      return Operand.getOperand(0);
+    break;
+  case ISD::FNEG:
+    // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+    if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+      return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+                     Operand.getNode()->getOperand(0));
+    if (OpOpcode == ISD::FNEG)  // --X -> X
+      return Operand.getNode()->getOperand(0);
+    break;
+  case ISD::FABS:
+    if (OpOpcode == ISD::FNEG)  // abs(-X) -> abs(X)
+      return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+    break;
+  }
+
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+    FoldingSetNodeID ID;
+    SDValue Ops[1] = { Operand };
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = NodeAllocator.Allocate<UnarySDNode>();
+    new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = NodeAllocator.Allocate<UnarySDNode>();
+    new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
+                                             EVT VT,
+                                             ConstantSDNode *Cst1,
+                                             ConstantSDNode *Cst2) {
+  const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+
+  switch (Opcode) {
+  case ISD::ADD:  return getConstant(C1 + C2, VT);
+  case ISD::SUB:  return getConstant(C1 - C2, VT);
+  case ISD::MUL:  return getConstant(C1 * C2, VT);
+  case ISD::UDIV:
+    if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
+    break;
+  case ISD::UREM:
+    if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
+    break;
+  case ISD::SDIV:
+    if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
+    break;
+  case ISD::SREM:
+    if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
+    break;
+  case ISD::AND:  return getConstant(C1 & C2, VT);
+  case ISD::OR:   return getConstant(C1 | C2, VT);
+  case ISD::XOR:  return getConstant(C1 ^ C2, VT);
+  case ISD::SHL:  return getConstant(C1 << C2, VT);
+  case ISD::SRL:  return getConstant(C1.lshr(C2), VT);
+  case ISD::SRA:  return getConstant(C1.ashr(C2), VT);
+  case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
+  case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
+  default: break;
+  }
+
+  return SDValue();
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2) {
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  switch (Opcode) {
+  default: break;
+  case ISD::TokenFactor:
+    assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+           N2.getValueType() == MVT::Other && "Invalid token factor!");
+    // Fold trivial token factors.
+    if (N1.getOpcode() == ISD::EntryToken) return N2;
+    if (N2.getOpcode() == ISD::EntryToken) return N1;
+    if (N1 == N2) return N1;
+    break;
+  case ISD::CONCAT_VECTORS:
+    // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+    // one big BUILD_VECTOR.
+    if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+        N2.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+      Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+      return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+    }
+    break;
+  case ISD::AND:
+    assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's
+    // worth handling here.
+    if (N2C && N2C->isNullValue())
+      return N2;
+    if (N2C && N2C->isAllOnesValue())  // X & -1 -> X
+      return N1;
+    break;
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::ADD:
+  case ISD::SUB:
+    assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    // (X ^|+- 0) -> X.  This commonly occurs when legalizing i64 values, so
+    // it's worth handling here.
+    if (N2C && N2C->isNullValue())
+      return N1;
+    break;
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::MULHU:
+  case ISD::MULHS:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::SREM:
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    // fall through
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+    if (UnsafeFPMath) {
+      if (Opcode == ISD::FADD) {
+        // 0+x --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+          if (CFP->getValueAPF().isZero())
+            return N2;
+        // x+0 --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+          if (CFP->getValueAPF().isZero())
+            return N1;
+      } else if (Opcode == ISD::FSUB) {
+        // x-0 --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+          if (CFP->getValueAPF().isZero())
+            return N1;
+      }
+    }
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    break;
+  case ISD::FCOPYSIGN:   // N1 and result must match.  N1/N2 need not match.
+    assert(N1.getValueType() == VT &&
+           N1.getValueType().isFloatingPoint() &&
+           N2.getValueType().isFloatingPoint() &&
+           "Invalid FCOPYSIGN!");
+    break;
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+    assert(VT == N1.getValueType() &&
+           "Shift operators return type must be the same as their first arg");
+    assert(VT.isInteger() && N2.getValueType().isInteger() &&
+           "Shifts only work on integers");
+
+    // Always fold shifts of i1 values so the code generator doesn't need to
+    // handle them.  Since we know the size of the shift has to be less than the
+    // size of the value, the shift/rotate count is guaranteed to be zero.
+    if (VT == MVT::i1)
+      return N1;
+    if (N2C && N2C->isNullValue())
+      return N1;
+    break;
+  case ISD::FP_ROUND_INREG: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg round!");
+    assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+           "Cannot FP_ROUND_INREG integer types");
+    assert(EVT.isVector() == VT.isVector() &&
+           "FP_ROUND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in FP_ROUND_INREG");
+    assert(EVT.bitsLE(VT) && "Not rounding down!");
+    if (cast<VTSDNode>(N2)->getVT() == VT) return N1;  // Not actually rounding.
+    break;
+  }
+  case ISD::FP_ROUND:
+    assert(VT.isFloatingPoint() &&
+           N1.getValueType().isFloatingPoint() &&
+           VT.bitsLE(N1.getValueType()) &&
+           isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+    if (N1.getValueType() == VT) return N1;  // noop conversion.
+    break;
+  case ISD::AssertSext:
+  case ISD::AssertZext: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg extend!");
+    assert(VT.isInteger() && EVT.isInteger() &&
+           "Cannot *_EXTEND_INREG FP types");
+    assert(!EVT.isVector() &&
+           "AssertSExt/AssertZExt type should be the vector element type "
+           "rather than the vector type!");
+    assert(EVT.bitsLE(VT) && "Not extending!");
+    if (VT == EVT) return N1; // noop assertion.
+    break;
+  }
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg extend!");
+    assert(VT.isInteger() && EVT.isInteger() &&
+           "Cannot *_EXTEND_INREG FP types");
+    assert(EVT.isVector() == VT.isVector() &&
+           "SIGN_EXTEND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in SIGN_EXTEND_INREG");
+    assert(EVT.bitsLE(VT) && "Not extending!");
+    if (EVT == VT) return N1;  // Not actually extending
+
+    if (N1C) {
+      APInt Val = N1C->getAPIntValue();
+      unsigned FromBits = EVT.getScalarType().getSizeInBits();
+      Val <<= Val.getBitWidth()-FromBits;
+      Val = Val.ashr(Val.getBitWidth()-FromBits);
+      return getConstant(Val, VT);
+    }
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT:
+    // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+    if (N1.getOpcode() == ISD::UNDEF)
+      return getUNDEF(VT);
+
+    // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+    // expanding copies of large vectors from registers.
+    if (N2C &&
+        N1.getOpcode() == ISD::CONCAT_VECTORS &&
+        N1.getNumOperands() > 0) {
+      unsigned Factor =
+        N1.getOperand(0).getValueType().getVectorNumElements();
+      return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                     N1.getOperand(N2C->getZExtValue() / Factor),
+                     getConstant(N2C->getZExtValue() % Factor,
+                                 N2.getValueType()));
+    }
+
+    // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+    // expanding large vector constants.
+    if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+      SDValue Elt = N1.getOperand(N2C->getZExtValue());
+      EVT VEltTy = N1.getValueType().getVectorElementType();
+      if (Elt.getValueType() != VEltTy) {
+        // If the vector element type is not legal, the BUILD_VECTOR operands
+        // are promoted and implicitly truncated.  Make that explicit here.
+        Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
+      }
+      if (VT != VEltTy) {
+        // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
+        // result is implicitly extended.
+        Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
+      }
+      return Elt;
+    }
+
+    // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+    // operations are lowered to scalars.
+    if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+      // If the indices are the same, return the inserted element else
+      // if the indices are known different, extract the element from
+      // the original vector.
+      if (N1.getOperand(2) == N2) {
+        if (VT == N1.getOperand(1).getValueType())
+          return N1.getOperand(1);
+        else
+          return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+      } else if (isa<ConstantSDNode>(N1.getOperand(2)) &&
+                 isa<ConstantSDNode>(N2))
+        return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+    }
+    break;
+  case ISD::EXTRACT_ELEMENT:
+    assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+    assert(!N1.getValueType().isVector() && !VT.isVector() &&
+           (N1.getValueType().isInteger() == VT.isInteger()) &&
+           "Wrong types for EXTRACT_ELEMENT!");
+
+    // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+    // 64-bit integers into 32-bit parts.  Instead of building the extract of
+    // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+    if (N1.getOpcode() == ISD::BUILD_PAIR)
+      return N1.getOperand(N2C->getZExtValue());
+
+    // EXTRACT_ELEMENT of a constant int is also very common.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      unsigned ElementSize = VT.getSizeInBits();
+      unsigned Shift = ElementSize * N2C->getZExtValue();
+      APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+      return getConstant(ShiftedVal.trunc(ElementSize), VT);
+    }
+    break;
+  case ISD::EXTRACT_SUBVECTOR:
+    if (N1.getValueType() == VT) // Trivial extraction.
+      return N1;
+    break;
+  }
+
+  if (N1C) {
+    if (N2C) {
+      SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
+      if (SV.getNode()) return SV;
+    } else {      // Cannonicalize constant to RHS if commutative
+      if (isCommutativeBinOp(Opcode)) {
+        std::swap(N1C, N2C);
+        std::swap(N1, N2);
+      }
+    }
+  }
+
+  // Constant fold FP operations.
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+  if (N1CFP) {
+    if (!N2CFP && isCommutativeBinOp(Opcode)) {
+      // Cannonicalize constant to RHS if commutative
+      std::swap(N1CFP, N2CFP);
+      std::swap(N1, N2);
+    } else if (N2CFP && VT != MVT::ppcf128) {
+      APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+      APFloat::opStatus s;
+      switch (Opcode) {
+      case ISD::FADD:
+        s = V1.add(V2, APFloat::rmNearestTiesToEven);
+        if (s != APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FSUB:
+        s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FMUL:
+        s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FDIV:
+        s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FREM :
+        s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FCOPYSIGN:
+        V1.copySign(V2);
+        return getConstantFP(V1, VT);
+      default: break;
+      }
+    }
+  }
+
+  // Canonicalize an UNDEF to the RHS, even over a constant.
+  if (N1.getOpcode() == ISD::UNDEF) {
+    if (isCommutativeBinOp(Opcode)) {
+      std::swap(N1, N2);
+    } else {
+      switch (Opcode) {
+      case ISD::FP_ROUND_INREG:
+      case ISD::SIGN_EXTEND_INREG:
+      case ISD::SUB:
+      case ISD::FSUB:
+      case ISD::FDIV:
+      case ISD::FREM:
+      case ISD::SRA:
+        return N1;     // fold op(undef, arg2) -> undef
+      case ISD::UDIV:
+      case ISD::SDIV:
+      case ISD::UREM:
+      case ISD::SREM:
+      case ISD::SRL:
+      case ISD::SHL:
+        if (!VT.isVector())
+          return getConstant(0, VT);    // fold op(undef, arg2) -> 0
+        // For vectors, we can't easily build an all zero vector, just return
+        // the LHS.
+        return N2;
+      }
+    }
+  }
+
+  // Fold a bunch of operators when the RHS is undef.
+  if (N2.getOpcode() == ISD::UNDEF) {
+    switch (Opcode) {
+    case ISD::XOR:
+      if (N1.getOpcode() == ISD::UNDEF)
+        // Handle undef ^ undef -> 0 special case. This is a common
+        // idiom (misuse).
+        return getConstant(0, VT);
+      // fallthrough
+    case ISD::ADD:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::SUB:
+    case ISD::UDIV:
+    case ISD::SDIV:
+    case ISD::UREM:
+    case ISD::SREM:
+      return N2;       // fold op(arg1, undef) -> undef
+    case ISD::FADD:
+    case ISD::FSUB:
+    case ISD::FMUL:
+    case ISD::FDIV:
+    case ISD::FREM:
+      if (UnsafeFPMath)
+        return N2;
+      break;
+    case ISD::MUL:
+    case ISD::AND:
+    case ISD::SRL:
+    case ISD::SHL:
+      if (!VT.isVector())
+        return getConstant(0, VT);  // fold op(arg1, undef) -> 0
+      // For vectors, we can't easily build an all zero vector, just return
+      // the LHS.
+      return N1;
+    case ISD::OR:
+      if (!VT.isVector())
+        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+      // For vectors, we can't easily build an all one vector, just return
+      // the LHS.
+      return N1;
+    case ISD::SRA:
+      return N1;
+    }
+  }
+
+  // Memoize this node if possible.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) {
+    SDValue Ops[] = { N1, N2 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = NodeAllocator.Allocate<BinarySDNode>();
+    new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = NodeAllocator.Allocate<BinarySDNode>();
+    new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3) {
+  // Perform various simplifications.
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  switch (Opcode) {
+  case ISD::CONCAT_VECTORS:
+    // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+    // one big BUILD_VECTOR.
+    if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+        N2.getOpcode() == ISD::BUILD_VECTOR &&
+        N3.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+      Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+      Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end());
+      return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+    }
+    break;
+  case ISD::SETCC: {
+    // Use FoldSetCC to simplify SETCC's.
+    SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+    if (Simp.getNode()) return Simp;
+    break;
+  }
+  case ISD::SELECT:
+    if (N1C) {
+     if (N1C->getZExtValue())
+        return N2;             // select true, X, Y -> X
+      else
+        return N3;             // select false, X, Y -> Y
+    }
+
+    if (N2 == N3) return N2;   // select C, X, X -> X
+    break;
+  case ISD::BRCOND:
+    if (N2C) {
+      if (N2C->getZExtValue()) // Unconditional branch
+        return getNode(ISD::BR, DL, MVT::Other, N1, N3);
+      else
+        return N1;         // Never-taken branch
+    }
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    llvm_unreachable("should use getVectorShuffle constructor!");
+    break;
+  case ISD::BIT_CONVERT:
+    // Fold bit_convert nodes from a type to themselves.
+    if (N1.getValueType() == VT)
+      return N1;
+    break;
+  }
+
+  // Memoize node if it doesn't produce a flag.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) {
+    SDValue Ops[] = { N1, N2, N3 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = NodeAllocator.Allocate<TernarySDNode>();
+    new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = NodeAllocator.Allocate<TernarySDNode>();
+    new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4) {
+  SDValue Ops[] = { N1, N2, N3, N4 };
+  return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4, SDValue N5) {
+  SDValue Ops[] = { N1, N2, N3, N4, N5 };
+  return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+  SmallVector<SDValue, 8> ArgChains;
+
+  // Include the original chain at the beginning of the list. When this is
+  // used by target LowerCall hooks, this helps legalize find the
+  // CALLSEQ_BEGIN node.
+  ArgChains.push_back(Chain);
+
+  // Add a chain value for each stack argument.
+  for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+       UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+        if (FI->getIndex() < 0)
+          ArgChains.push_back(SDValue(L, 1));
+
+  // Build a tokenfactor for all the chains.
+  return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+                 &ArgChains[0], ArgChains.size());
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+                              DebugLoc dl) {
+  unsigned NumBits = VT.isVector() ?
+    VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+    APInt Val = APInt(NumBits, C->getZExtValue() & 255);
+    unsigned Shift = 8;
+    for (unsigned i = NumBits; i > 8; i >>= 1) {
+      Val = (Val << Shift) | Val;
+      Shift <<= 1;
+    }
+    if (VT.isInteger())
+      return DAG.getConstant(Val, VT);
+    return DAG.getConstantFP(APFloat(Val), VT);
+  }
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Value = DAG.getNode(ISD::OR, dl, VT,
+                        DAG.getNode(ISD::SHL, dl, VT, Value,
+                                    DAG.getConstant(Shift,
+                                                    TLI.getShiftAmountTy())),
+                        Value);
+    Shift <<= 1;
+  }
+
+  return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+                                  const TargetLowering &TLI,
+                                  std::string &Str, unsigned Offset) {
+  // Handle vector with all elements zero.
+  if (Str.empty()) {
+    if (VT.isInteger())
+      return DAG.getConstant(0, VT);
+    unsigned NumElts = VT.getVectorNumElements();
+    MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+                       DAG.getConstant(0,
+                       EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
+  }
+
+  assert(!VT.isVector() && "Can't handle vector type here!");
+  unsigned NumBits = VT.getSizeInBits();
+  unsigned MSB = NumBits / 8;
+  uint64_t Val = 0;
+  if (TLI.isLittleEndian())
+    Offset = Offset + MSB - 1;
+  for (unsigned i = 0; i != MSB; ++i) {
+    Val = (Val << 8) | (unsigned char)Str[Offset];
+    Offset += TLI.isLittleEndian() ? -1 : 1;
+  }
+  return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+                                      SelectionDAG &DAG) {
+  EVT VT = Base.getValueType();
+  return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+                     VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+  unsigned SrcDelta = 0;
+  GlobalAddressSDNode *G = NULL;
+  if (Src.getOpcode() == ISD::GlobalAddress)
+    G = cast<GlobalAddressSDNode>(Src);
+  else if (Src.getOpcode() == ISD::ADD &&
+           Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+           Src.getOperand(1).getOpcode() == ISD::Constant) {
+    G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+    SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+  }
+  if (!G)
+    return false;
+
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+  if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
+    return true;
+
+  return false;
+}
+
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of memory ops to perform memset / memcpy.
+static
+bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
+                              SDValue Dst, SDValue Src,
+                              unsigned Limit, uint64_t Size, unsigned &Align,
+                              std::string &Str, bool &isSrcStr,
+                              SelectionDAG &DAG,
+                              const TargetLowering &TLI) {
+  isSrcStr = isMemSrcFromString(Src, Str);
+  bool isSrcConst = isa<ConstantSDNode>(Src);
+  EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
+  bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
+  if (VT != MVT::iAny) {
+    const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    // If source is a string constant, this will require an unaligned load.
+    if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
+      if (Dst.getOpcode() != ISD::FrameIndex) {
+        // Can't change destination alignment. It requires a unaligned store.
+        if (AllowUnalign)
+          VT = MVT::iAny;
+      } else {
+        int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
+        MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+        if (MFI->isFixedObjectIndex(FI)) {
+          // Can't change destination alignment. It requires a unaligned store.
+          if (AllowUnalign)
+            VT = MVT::iAny;
+        } else {
+          // Give the stack frame object a larger alignment if needed.
+          if (MFI->getObjectAlignment(FI) < NewAlign)
+            MFI->setObjectAlignment(FI, NewAlign);
+          Align = NewAlign;
+        }
+      }
+    }
+  }
+
+  if (VT == MVT::iAny) {
+    if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
+      VT = MVT::i64;
+    } else {
+      switch (Align & 7) {
+      case 0:  VT = MVT::i64; break;
+      case 4:  VT = MVT::i32; break;
+      case 2:  VT = MVT::i16; break;
+      default: VT = MVT::i8;  break;
+      }
+    }
+
+    MVT LVT = MVT::i64;
+    while (!TLI.isTypeLegal(LVT))
+      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+    assert(LVT.isInteger());
+
+    if (VT.bitsGT(LVT))
+      VT = LVT;
+  }
+
+  unsigned NumMemOps = 0;
+  while (Size != 0) {
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    while (VTSize > Size) {
+      // For now, only use non-vector load / store's for the left-over pieces.
+      if (VT.isVector()) {
+        VT = MVT::i64;
+        while (!TLI.isTypeLegal(VT))
+          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+        VTSize = VT.getSizeInBits() / 8;
+      } else {
+        // This can result in a type that is not legal on the target, e.g.
+        // 1 or 2 bytes on PPC.
+        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+        VTSize >>= 1;
+      }
+    }
+
+    if (++NumMemOps > Limit)
+      return false;
+    MemOps.push_back(VT);
+    Size -= VTSize;
+  }
+
+  return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+                                         SDValue Chain, SDValue Dst,
+                                         SDValue Src, uint64_t Size,
+                                         unsigned Align, bool AlwaysInline,
+                                         const Value *DstSV, uint64_t DstSVOff,
+                                         const Value *SrcSV, uint64_t SrcSVOff){
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Expand memcpy to a series of load and store ops if the size operand falls
+  // below a certain threshold.
+  std::vector<EVT> MemOps;
+  uint64_t Limit = -1ULL;
+  if (!AlwaysInline)
+    Limit = TLI.getMaxStoresPerMemcpy();
+  unsigned DstAlign = Align;  // Destination alignment can change.
+  std::string Str;
+  bool CopyFromStr;
+  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+                                Str, CopyFromStr, DAG, TLI))
+    return SDValue();
+
+
+  bool isZeroStr = CopyFromStr && Str.empty();
+  SmallVector<SDValue, 8> OutChains;
+  unsigned NumMemOps = MemOps.size();
+  uint64_t SrcOff = 0, DstOff = 0;
+  for (unsigned i = 0; i != NumMemOps; ++i) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    if (CopyFromStr && (isZeroStr || !VT.isVector())) {
+      // It's unlikely a store of a vector immediate can be done in a single
+      // instruction. It would require a load from a constantpool first.
+      // We also handle store a vector with all zero's.
+      // FIXME: Handle other cases where store of vector immediate is done in
+      // a single instruction.
+      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+      Store = DAG.getStore(Chain, dl, Value,
+                           getMemBasePlusOffset(Dst, DstOff, DAG),
+                           DstSV, DstSVOff + DstOff, false, DstAlign);
+    } else {
+      // The type might not be legal for the target.  This should only happen
+      // if the type is smaller than a legal type, as on PPC, so the right
+      // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
+      // to Load/Store if NVT==VT.
+      // FIXME does the case above also need this?
+      EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+      assert(NVT.bitsGE(VT));
+      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+                             getMemBasePlusOffset(Src, SrcOff, DAG),
+                             SrcSV, SrcSVOff + SrcOff, VT, false, Align);
+      Store = DAG.getTruncStore(Chain, dl, Value,
+                             getMemBasePlusOffset(Dst, DstOff, DAG),
+                             DstSV, DstSVOff + DstOff, VT, false, DstAlign);
+    }
+    OutChains.push_back(Store);
+    SrcOff += VTSize;
+    DstOff += VTSize;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+                                          SDValue Chain, SDValue Dst,
+                                          SDValue Src, uint64_t Size,
+                                          unsigned Align, bool AlwaysInline,
+                                          const Value *DstSV, uint64_t DstSVOff,
+                                          const Value *SrcSV, uint64_t SrcSVOff){
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Expand memmove to a series of load and store ops if the size operand falls
+  // below a certain threshold.
+  std::vector<EVT> MemOps;
+  uint64_t Limit = -1ULL;
+  if (!AlwaysInline)
+    Limit = TLI.getMaxStoresPerMemmove();
+  unsigned DstAlign = Align;  // Destination alignment can change.
+  std::string Str;
+  bool CopyFromStr;
+  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+                                Str, CopyFromStr, DAG, TLI))
+    return SDValue();
+
+  uint64_t SrcOff = 0, DstOff = 0;
+
+  SmallVector<SDValue, 8> LoadValues;
+  SmallVector<SDValue, 8> LoadChains;
+  SmallVector<SDValue, 8> OutChains;
+  unsigned NumMemOps = MemOps.size();
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    Value = DAG.getLoad(VT, dl, Chain,
+                        getMemBasePlusOffset(Src, SrcOff, DAG),
+                        SrcSV, SrcSVOff + SrcOff, false, Align);
+    LoadValues.push_back(Value);
+    LoadChains.push_back(Value.getValue(1));
+    SrcOff += VTSize;
+  }
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                      &LoadChains[0], LoadChains.size());
+  OutChains.clear();
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    Store = DAG.getStore(Chain, dl, LoadValues[i],
+                         getMemBasePlusOffset(Dst, DstOff, DAG),
+                         DstSV, DstSVOff + DstOff, false, DstAlign);
+    OutChains.push_back(Store);
+    DstOff += VTSize;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+                                 SDValue Chain, SDValue Dst,
+                                 SDValue Src, uint64_t Size,
+                                 unsigned Align,
+                                 const Value *DstSV, uint64_t DstSVOff) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Expand memset to a series of load/store ops if the size operand
+  // falls below a certain threshold.
+  std::vector<EVT> MemOps;
+  std::string Str;
+  bool CopyFromStr;
+  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
+                                Size, Align, Str, CopyFromStr, DAG, TLI))
+    return SDValue();
+
+  SmallVector<SDValue, 8> OutChains;
+  uint64_t DstOff = 0;
+
+  unsigned NumMemOps = MemOps.size();
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+    SDValue Store = DAG.getStore(Chain, dl, Value,
+                                 getMemBasePlusOffset(Dst, DstOff, DAG),
+                                 DstSV, DstSVOff + DstOff);
+    OutChains.push_back(Store);
+    DstOff += VTSize;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                SDValue Src, SDValue Size,
+                                unsigned Align, bool AlwaysInline,
+                                const Value *DstSV, uint64_t DstSVOff,
+                                const Value *SrcSV, uint64_t SrcSVOff) {
+
+  // Check to see if we should lower the memcpy to loads and stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memcpy with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result =
+      getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+                              ConstantSize->getZExtValue(),
+                              Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memcpy with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+                                AlwaysInline,
+                                DstSV, DstSVOff, SrcSV, SrcSVOff);
+  if (Result.getNode())
+    return Result;
+
+  // If we really need inline code and the target declined to provide it,
+  // use a (potentially long) sequence of loads and stores.
+  if (AlwaysInline) {
+    assert(ConstantSize && "AlwaysInline requires a constant size!");
+    return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+                                   ConstantSize->getZExtValue(), Align, true,
+                                   DstSV, DstSVOff, SrcSV, SrcSVOff);
+  }
+
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Node = Dst; Args.push_back(Entry);
+  Entry.Node = Src; Args.push_back(Entry);
+  Entry.Node = Size; Args.push_back(Entry);
+  // FIXME: pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl, GetOrdering(Chain.getNode()));
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                 SDValue Src, SDValue Size,
+                                 unsigned Align,
+                                 const Value *DstSV, uint64_t DstSVOff,
+                                 const Value *SrcSV, uint64_t SrcSVOff) {
+
+  // Check to see if we should lower the memmove to loads and stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memmove with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result =
+      getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+                               ConstantSize->getZExtValue(),
+                               Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memmove with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align,
+                                 DstSV, DstSVOff, SrcSV, SrcSVOff);
+  if (Result.getNode())
+    return Result;
+
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Node = Dst; Args.push_back(Entry);
+  Entry.Node = Src; Args.push_back(Entry);
+  Entry.Node = Size; Args.push_back(Entry);
+  // FIXME:  pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl, GetOrdering(Chain.getNode()));
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                SDValue Src, SDValue Size,
+                                unsigned Align,
+                                const Value *DstSV, uint64_t DstSVOff) {
+
+  // Check to see if we should lower the memset to stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memset with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result =
+      getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+                      Align, DstSV, DstSVOff);
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memset with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align,
+                                DstSV, DstSVOff);
+  if (Result.getNode())
+    return Result;
+
+  // Emit a library call.
+  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Dst; Entry.Ty = IntPtrTy;
+  Args.push_back(Entry);
+  // Extend or truncate the argument to be an i32 value for the call.
+  if (Src.getValueType().bitsGT(MVT::i32))
+    Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+  else
+    Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+  Entry.Node = Src;
+  Entry.Ty = Type::getInt32Ty(*getContext());
+  Entry.isSExt = true;
+  Args.push_back(Entry);
+  Entry.Node = Size;
+  Entry.Ty = IntPtrTy;
+  Entry.isSExt = false;
+  Args.push_back(Entry);
+  // FIXME: pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl, GetOrdering(Chain.getNode()));
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, const Value* PtrVal,
+                                unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  // Check if the memory reference references a frame index
+  if (!PtrVal)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrVal, Flags, 0,
+                            MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachineMemOperand *MMO) {
+  assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+  assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+  EVT VT = Cmp.getValueType();
+
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  FoldingSetNodeID ID;
+  ID.AddInteger(MemVT.getRawBits());
+  SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+  AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
+  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Val,
+                                const Value* PtrVal,
+                                unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  // Check if the memory reference references a frame index
+  if (!PtrVal)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrVal, Flags, 0,
+                            MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Val,
+                                MachineMemOperand *MMO) {
+  assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+          Opcode == ISD::ATOMIC_LOAD_SUB ||
+          Opcode == ISD::ATOMIC_LOAD_AND ||
+          Opcode == ISD::ATOMIC_LOAD_OR ||
+          Opcode == ISD::ATOMIC_LOAD_XOR ||
+          Opcode == ISD::ATOMIC_LOAD_NAND ||
+          Opcode == ISD::ATOMIC_LOAD_MIN ||
+          Opcode == ISD::ATOMIC_LOAD_MAX ||
+          Opcode == ISD::ATOMIC_LOAD_UMIN ||
+          Opcode == ISD::ATOMIC_LOAD_UMAX ||
+          Opcode == ISD::ATOMIC_SWAP) &&
+         "Invalid Atomic Op");
+
+  EVT VT = Val.getValueType();
+
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  FoldingSetNodeID ID;
+  ID.AddInteger(MemVT.getRawBits());
+  SDValue Ops[] = {Chain, Ptr, Val};
+  AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
+  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+/// Allowed to return something different (and simpler) if Simplify is true.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+                                     DebugLoc dl) {
+  if (NumOps == 1)
+    return Ops[0];
+
+  SmallVector<EVT, 4> VTs;
+  VTs.reserve(NumOps);
+  for (unsigned i = 0; i < NumOps; ++i)
+    VTs.push_back(Ops[i].getValueType());
+  return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+                 Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+                                  const EVT *VTs, unsigned NumVTs,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, const Value *srcValue, int SVOff,
+                                  unsigned Align, bool Vol,
+                                  bool ReadMem, bool WriteMem) {
+  return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+                             MemVT, srcValue, SVOff, Align, Vol,
+                             ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, const Value *srcValue, int SVOff,
+                                  unsigned Align, bool Vol,
+                                  bool ReadMem, bool WriteMem) {
+  if (Align == 0)  // Ensure that codegen never sees alignment 0
+    Align = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = 0;
+  if (WriteMem)
+    Flags |= MachineMemOperand::MOStore;
+  if (ReadMem)
+    Flags |= MachineMemOperand::MOLoad;
+  if (Vol)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(srcValue, Flags, SVOff,
+                            MemVT.getStoreSize(), Align);
+
+  return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachineMemOperand *MMO) {
+  assert((Opcode == ISD::INTRINSIC_VOID ||
+          Opcode == ISD::INTRINSIC_W_CHAIN ||
+          (Opcode <= INT_MAX &&
+           (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+         "Opcode is not a memory-accessing opcode!");
+
+  // Memoize the node unless it returns a flag.
+  MemIntrinsicSDNode *N;
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+      return SDValue(E, 0);
+    }
+
+    N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
+    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
+    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+  }
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
+                      ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+                      SDValue Ptr, SDValue Offset,
+                      const Value *SV, int SVOffset, EVT MemVT,
+                      bool isVolatile, unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(VT);
+
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset,
+                            MemVT.getStoreSize(), Alignment);
+  return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
+                      ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+                      SDValue Ptr, SDValue Offset, EVT MemVT,
+                      MachineMemOperand *MMO) {
+  if (VT == MemVT) {
+    ExtType = ISD::NON_EXTLOAD;
+  } else if (ExtType == ISD::NON_EXTLOAD) {
+    assert(VT == MemVT && "Non-extending load from different memory type!");
+  } else {
+    // Extending load.
+    assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+           "Should only be an extending load, not truncating!");
+    assert(VT.isInteger() == MemVT.isInteger() &&
+           "Cannot convert from FP to Int or Int -> FP!");
+    assert(VT.isVector() == MemVT.isVector() &&
+           "Cannot use trunc store to convert to or from a vector!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+           "Cannot use trunc store to change the number of vector elements!");
+  }
+
+  bool Indexed = AM != ISD::UNINDEXED;
+  assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+         "Unindexed load with an offset!");
+
+  SDVTList VTs = Indexed ?
+    getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+  SDValue Ops[] = { Chain, Ptr, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(MemVT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<LoadSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
+  new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
+                              SDValue Chain, SDValue Ptr,
+                              const Value *SV, int SVOffset,
+                              bool isVolatile, unsigned Alignment) {
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
+                 SV, SVOffset, VT, isVolatile, Alignment);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+                                 SDValue Chain, SDValue Ptr,
+                                 const Value *SV,
+                                 int SVOffset, EVT MemVT,
+                                 bool isVolatile, unsigned Alignment) {
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
+                 SV, SVOffset, MemVT, isVolatile, Alignment);
+}
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+                             SDValue Offset, ISD::MemIndexedMode AM) {
+  LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+  assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+         "Load is already a indexed load!");
+  return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
+                 LD->getChain(), Base, Offset, LD->getSrcValue(),
+                 LD->getSrcValueOffset(), LD->getMemoryVT(),
+                 LD->isVolatile(), LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                               SDValue Ptr, const Value *SV, int SVOffset,
+                               bool isVolatile, unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(Val.getValueType());
+
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset,
+                            Val.getValueType().getStoreSize(), Alignment);
+
+  return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                               SDValue Ptr, MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
+  SDVTList VTs = getVTList(MVT::Other);
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  SDValue Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                                    SDValue Ptr, const Value *SV,
+                                    int SVOffset, EVT SVT,
+                                    bool isVolatile, unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(SVT);
+
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+
+  return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                                    SDValue Ptr, EVT SVT,
+                                    MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
+
+  if (VT == SVT)
+    return getStore(Chain, dl, Val, Ptr, MMO);
+
+  assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+         "Should only be a truncating store, not extending!");
+  assert(VT.isInteger() == SVT.isInteger() &&
+         "Can't do FP-INT conversion!");
+  assert(VT.isVector() == SVT.isVector() &&
+         "Cannot use trunc store to convert to or from a vector!");
+  assert((!VT.isVector() ||
+          VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+         "Cannot use trunc store to change the number of vector elements!");
+
+  SDVTList VTs = getVTList(MVT::Other);
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  SDValue Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(SVT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+                              SDValue Offset, ISD::MemIndexedMode AM) {
+  StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+  assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+         "Store is already a indexed store!");
+  SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+  SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(ST->getMemoryVT().getRawBits());
+  ID.AddInteger(ST->getRawSubclassData());
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+  new (N) StoreSDNode(Ops, dl, VTs, AM,
+                      ST->isTruncatingStore(), ST->getMemoryVT(),
+                      ST->getMemOperand());
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
+                               SDValue Chain, SDValue Ptr,
+                               SDValue SV) {
+  SDValue Ops[] = { Chain, Ptr, SV };
+  return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              const SDUse *Ops, unsigned NumOps) {
+  switch (NumOps) {
+  case 0: return getNode(Opcode, DL, VT);
+  case 1: return getNode(Opcode, DL, VT, Ops[0]);
+  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+  default: break;
+  }
+
+  // Copy from an SDUse array into an SDValue array for use with
+  // the regular getNode logic.
+  SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+  return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              const SDValue *Ops, unsigned NumOps) {
+  switch (NumOps) {
+  case 0: return getNode(Opcode, DL, VT);
+  case 1: return getNode(Opcode, DL, VT, Ops[0]);
+  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+  default: break;
+  }
+
+  switch (Opcode) {
+  default: break;
+  case ISD::SELECT_CC: {
+    assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+    assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+           "LHS and RHS of condition must have same type!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "True and False arms of SelectCC must have same type!");
+    assert(Ops[2].getValueType() == VT &&
+           "select_cc node must be of same type as true and false value!");
+    break;
+  }
+  case ISD::BR_CC: {
+    assert(NumOps == 5 && "BR_CC takes 5 operands!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "LHS/RHS of comparison should match types!");
+    break;
+  }
+  }
+
+  // Memoize nodes.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+
+  if (VT != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+    void *IP = 0;
+
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = NodeAllocator.Allocate<SDNode>();
+    new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = NodeAllocator.Allocate<SDNode>();
+    new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              const std::vector<EVT> &ResultTys,
+                              const SDValue *Ops, unsigned NumOps) {
+  return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+                 Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              const EVT *VTs, unsigned NumVTs,
+                              const SDValue *Ops, unsigned NumOps) {
+  if (NumVTs == 1)
+    return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+  return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              const SDValue *Ops, unsigned NumOps) {
+  if (VTList.NumVTs == 1)
+    return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+#if 0
+  switch (Opcode) {
+  // FIXME: figure out how to safely handle things like
+  // int foo(int x) { return 1 << (x & 255); }
+  // int bar() { return foo(256); }
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS:
+  case ISD::SHL_PARTS:
+    if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+        cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+      return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+    else if (N3.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+        // If the and is only masking out bits that cannot effect the shift,
+        // eliminate the and.
+        unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+        if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+          return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+      }
+    break;
+  }
+#endif
+
+  // Memoize the node unless it returns a flag.
+  SDNode *N;
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    if (NumOps == 1) {
+      N = NodeAllocator.Allocate<UnarySDNode>();
+      new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+    } else if (NumOps == 2) {
+      N = NodeAllocator.Allocate<BinarySDNode>();
+      new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+    } else if (NumOps == 3) {
+      N = NodeAllocator.Allocate<TernarySDNode>();
+      new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+    } else {
+      N = NodeAllocator.Allocate<SDNode>();
+      new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+    }
+    CSEMap.InsertNode(N, IP);
+  } else {
+    if (NumOps == 1) {
+      N = NodeAllocator.Allocate<UnarySDNode>();
+      new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+    } else if (NumOps == 2) {
+      N = NodeAllocator.Allocate<BinarySDNode>();
+      new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+    } else if (NumOps == 3) {
+      N = NodeAllocator.Allocate<TernarySDNode>();
+      new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+    } else {
+      N = NodeAllocator.Allocate<SDNode>();
+      new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+    }
+  }
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+  return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1) {
+  SDValue Ops[] = { N1 };
+  return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2) {
+  SDValue Ops[] = { N1, N2 };
+  return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3) {
+  SDValue Ops[] = { N1, N2, N3 };
+  return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4) {
+  SDValue Ops[] = { N1, N2, N3, N4 };
+  return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4, SDValue N5) {
+  SDValue Ops[] = { N1, N2, N3, N4, N5 };
+  return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+  return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(2);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  SDVTList Result = makeVTList(Array, 2);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+                          I->VTs[2] == VT3)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(3);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  Array[2] = VT3;
+  SDVTList Result = makeVTList(Array, 3);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+                          I->VTs[2] == VT3 && I->VTs[3] == VT4)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(4);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  Array[2] = VT3;
+  Array[3] = VT4;
+  SDVTList Result = makeVTList(Array, 4);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
+  switch (NumVTs) {
+    case 0: llvm_unreachable("Cannot have nodes without results!");
+    case 1: return getVTList(VTs[0]);
+    case 2: return getVTList(VTs[0], VTs[1]);
+    case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+    case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
+    default: break;
+  }
+
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I) {
+    if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+      continue;
+
+    bool NoMatch = false;
+    for (unsigned i = 2; i != NumVTs; ++i)
+      if (VTs[i] != I->VTs[i]) {
+        NoMatch = true;
+        break;
+      }
+    if (!NoMatch)
+      return *I;
+  }
+
+  EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+  std::copy(VTs, VTs+NumVTs, Array);
+  SDVTList Result = makeVTList(Array, NumVTs);
+  VTList.push_back(Result);
+  return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands.  If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists.  If the resultant node does not exist in the DAG, the
+/// input node is returned.  As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {
+  SDNode *N = InN.getNode();
+  assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  if (Op == N->getOperand(0)) return InN;
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+    return SDValue(Existing, InN.getResNo());
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  N->OperandList[0].set(Op);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {
+  SDNode *N = InN.getNode();
+  assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+    return InN;   // No operands changed, just return the input node.
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+    return SDValue(Existing, InN.getResNo());
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  if (N->OperandList[0] != Op1)
+    N->OperandList[0].set(Op1);
+  if (N->OperandList[1] != Op2)
+    N->OperandList[1].set(Op2);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+                   SDValue Op3, SDValue Op4) {
+  SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+  return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+                   SDValue Op3, SDValue Op4, SDValue Op5) {
+  SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+  return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
+  SDNode *N = InN.getNode();
+  assert(N->getNumOperands() == NumOps &&
+         "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  bool AnyChange = false;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    if (Ops[i] != N->getOperand(i)) {
+      AnyChange = true;
+      break;
+    }
+  }
+
+  // No operands changed, just return the input node.
+  if (!AnyChange) return InN;
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+    return SDValue(Existing, InN.getResNo());
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  for (unsigned i = 0; i != NumOps; ++i)
+    if (N->OperandList[i] != Ops[i])
+      N->OperandList[i].set(Ops[i]);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+  // Unlike the code in MorphNodeTo that does this, we don't need to
+  // watch for dead nodes here.
+  for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+    SDUse &Use = *I++;
+    Use.set(SDValue());
+  }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1,
+                                   SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1,
+                                   SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, const SDValue *Ops,
+                                   unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, const SDValue *Ops,
+                                   unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3,
+                                   const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+                                   const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1, SDValue Op2,
+                                   SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3,
+                                   SDValue Op1, SDValue Op2,
+                                   SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   SDVTList VTs, const SDValue *Ops,
+                                   unsigned NumOps) {
+  return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return MorphNodeTo(N, Opc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return MorphNodeTo(N, Opc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT, SDValue Op1,
+                                  SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return MorphNodeTo(N, Opc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT, SDValue Op1,
+                                  SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return MorphNodeTo(N, Opc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT, const SDValue *Ops,
+                                  unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT1, EVT VT2, const SDValue *Ops,
+                                  unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT1, EVT VT2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT1, EVT VT2, EVT VT3,
+                                  const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT1, EVT VT2,
+                                  SDValue Op1) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1 };
+  return MorphNodeTo(N, Opc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT1, EVT VT2,
+                                  SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2 };
+  return MorphNodeTo(N, Opc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  EVT VT1, EVT VT2,
+                                  SDValue Op1, SDValue Op2,
+                                  SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return MorphNodeTo(N, Opc, VTs, Ops, 3);
+}
+
+/// MorphNodeTo - These *mutate* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.  Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  SDVTList VTs, const SDValue *Ops,
+                                  unsigned NumOps) {
+  // If an identical node already exists, use it.
+  void *IP = 0;
+  if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+    if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return ON;
+  }
+
+  if (!RemoveNodeFromCSEMaps(N))
+    IP = 0;
+
+  // Start the morphing.
+  N->NodeType = Opc;
+  N->ValueList = VTs.VTs;
+  N->NumValues = VTs.NumVTs;
+
+  // Clear the operands list, updating used nodes to remove this from their
+  // use list.  Keep track of any operands that become dead as a result.
+  SmallPtrSet<SDNode*, 16> DeadNodeSet;
+  for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+    SDUse &Use = *I++;
+    SDNode *Used = Use.getNode();
+    Use.set(SDValue());
+    if (Used->use_empty())
+      DeadNodeSet.insert(Used);
+  }
+
+  if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+    // Initialize the memory references information.
+    MN->setMemRefs(0, 0);
+    // If NumOps is larger than the # of operands we can have in a
+    // MachineSDNode, reallocate the operand list.
+    if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+      if (MN->OperandsNeedDelete)
+        delete[] MN->OperandList;
+      if (NumOps > array_lengthof(MN->LocalOperands))
+        // We're creating a final node that will live unmorphed for the
+        // remainder of the current SelectionDAG iteration, so we can allocate
+        // the operands directly out of a pool with no recycling metadata.
+        MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                        Ops, NumOps);
+      else
+        MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+      MN->OperandsNeedDelete = false;
+    } else
+      MN->InitOperands(MN->OperandList, Ops, NumOps);
+  } else {
+    // If NumOps is larger than the # of operands we currently have, reallocate
+    // the operand list.
+    if (NumOps > N->NumOperands) {
+      if (N->OperandsNeedDelete)
+        delete[] N->OperandList;
+      N->InitOperands(new SDUse[NumOps], Ops, NumOps);
+      N->OperandsNeedDelete = true;
+    } else
+      N->InitOperands(N->OperandList, Ops, NumOps);
+  }
+
+  // Delete any nodes that are still dead after adding the uses for the
+  // new operands.
+  SmallVector<SDNode *, 16> DeadNodes;
+  for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+       E = DeadNodeSet.end(); I != E; ++I)
+    if ((*I)->use_empty())
+      DeadNodes.push_back(*I);
+  RemoveDeadNodes(DeadNodes);
+
+  if (IP)
+    CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1,
+                             SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                             EVT VT2, EVT VT3, EVT VT4,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             const std::vector<EVT> &ResultTys,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                             const SDValue *Ops, unsigned NumOps) {
+  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+  MachineSDNode *N;
+  void *IP;
+
+  if (DoCSE) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+    IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return cast<MachineSDNode>(E);
+  }
+
+  // Allocate a new MachineSDNode.
+  N = NodeAllocator.Allocate<MachineSDNode>();
+  new (N) MachineSDNode(~Opcode, DL, VTs);
+
+  // Initialize the operands list.
+  if (NumOps > array_lengthof(N->LocalOperands))
+    // We're creating a final node that will live unmorphed for the
+    // remainder of the current SelectionDAG iteration, so we can allocate
+    // the operands directly out of a pool with no recycling metadata.
+    N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                    Ops, NumOps);
+  else
+    N->InitOperands(N->LocalOperands, Ops, NumOps);
+  N->OperandsNeedDelete = false;
+
+  if (DoCSE)
+    CSEMap.InsertNode(N, IP);
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                     SDValue Operand) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                  VT, Operand, SRIdxVal);
+  return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                    SDValue Operand, SDValue Subreg) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                  VT, Operand, Subreg, SRIdxVal);
+  return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+                                      const SDValue *Ops, unsigned NumOps) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return E;
+  }
+  return NULL;
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
+                                      DAGUpdateListener *UpdateListener) {
+  SDNode *From = FromN.getNode();
+  assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+         "Cannot replace with this method!");
+  assert(From != To.getNode() && "Cannot replace uses of with self");
+
+  // Iterate over all the existing uses of From. New uses will be added
+  // to the beginning of the use list, which we avoid visiting.
+  // This specifically avoids visiting uses of From that arise while the
+  // replacement is happening, because any such uses would be the result
+  // of CSE: If an existing node looks like From after one of its operands
+  // is replaced by To, we don't want to replace of all its users with To
+  // too. See PR3018 for more info.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      ++UI;
+      Use.set(To);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, UpdateListener);
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+                                      DAGUpdateListener *UpdateListener) {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+    assert((!From->hasAnyUseOfValue(i) ||
+            From->getValueType(i) == To->getValueType(i)) &&
+           "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+  // Handle the trivial case.
+  if (From == To)
+    return;
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      ++UI;
+      Use.setNode(To);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, UpdateListener);
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values.  To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+                                      const SDValue *To,
+                                      DAGUpdateListener *UpdateListener) {
+  if (From->getNumValues() == 1)  // Handle the simple case efficiently.
+    return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      const SDValue &ToOp = To[Use.getResNo()];
+      ++UI;
+      Use.set(ToOp);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, UpdateListener);
+  }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone.  The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
+                                             DAGUpdateListener *UpdateListener){
+  // Handle the really simple, really trivial case efficiently.
+  if (From == To) return;
+
+  // Handle the simple, trivial, case efficiently.
+  if (From.getNode()->getNumValues() == 1) {
+    ReplaceAllUsesWith(From, To, UpdateListener);
+    return;
+  }
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From.getNode()->use_begin(),
+                       UE = From.getNode()->use_end();
+  while (UI != UE) {
+    SDNode *User = *UI;
+    bool UserRemovedFromCSEMaps = false;
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+
+      // Skip uses of different values from the same node.
+      if (Use.getResNo() != From.getResNo()) {
+        ++UI;
+        continue;
+      }
+
+      // If this node hasn't been modified yet, it's still in the CSE maps,
+      // so remove its old self from the CSE maps.
+      if (!UserRemovedFromCSEMaps) {
+        RemoveNodeFromCSEMaps(User);
+        UserRemovedFromCSEMaps = true;
+      }
+
+      ++UI;
+      Use.set(To);
+    } while (UI != UE && *UI == User);
+
+    // We are iterating over all uses of the From node, so if a use
+    // doesn't use the specific value, no changes are made.
+    if (!UserRemovedFromCSEMaps)
+      continue;
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, UpdateListener);
+  }
+}
+
+namespace {
+  /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+  /// to record information about a use.
+  struct UseMemo {
+    SDNode *User;
+    unsigned Index;
+    SDUse *Use;
+  };
+
+  /// operator< - Sort Memos by User.
+  bool operator<(const UseMemo &L, const UseMemo &R) {
+    return (intptr_t)L.User < (intptr_t)R.User;
+  }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone.  The same value
+/// may appear in both the From and To list.  The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+                                              const SDValue *To,
+                                              unsigned Num,
+                                              DAGUpdateListener *UpdateListener){
+  // Handle the simple, trivial case efficiently.
+  if (Num == 1)
+    return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+
+  // Read up all the uses and make records of them. This helps
+  // processing new uses that are introduced during the
+  // replacement process.
+  SmallVector<UseMemo, 4> Uses;
+  for (unsigned i = 0; i != Num; ++i) {
+    unsigned FromResNo = From[i].getResNo();
+    SDNode *FromNode = From[i].getNode();
+    for (SDNode::use_iterator UI = FromNode->use_begin(),
+         E = FromNode->use_end(); UI != E; ++UI) {
+      SDUse &Use = UI.getUse();
+      if (Use.getResNo() == FromResNo) {
+        UseMemo Memo = { *UI, i, &Use };
+        Uses.push_back(Memo);
+      }
+    }
+  }
+
+  // Sort the uses, so that all the uses from a given User are together.
+  std::sort(Uses.begin(), Uses.end());
+
+  for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+       UseIndex != UseIndexEnd; ) {
+    // We know that this user uses some value of From.  If it is the right
+    // value, update it.
+    SDNode *User = Uses[UseIndex].User;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // The Uses array is sorted, so all the uses for a given User
+    // are next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      unsigned i = Uses[UseIndex].Index;
+      SDUse &Use = *Uses[UseIndex].Use;
+      ++UseIndex;
+
+      Use.set(To[i]);
+    } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, UpdateListener);
+  }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+  unsigned DAGSize = 0;
+
+  // SortedPos tracks the progress of the algorithm. Nodes before it are
+  // sorted, nodes after it are unsorted. When the algorithm completes
+  // it is at the end of the list.
+  allnodes_iterator SortedPos = allnodes_begin();
+
+  // Visit all the nodes. Move nodes with no operands to the front of
+  // the list immediately. Annotate nodes that do have operands with their
+  // operand count. Before we do this, the Node Id fields of the nodes
+  // may contain arbitrary values. After, the Node Id fields for nodes
+  // before SortedPos will contain the topological sort index, and the
+  // Node Id fields for nodes At SortedPos and after will contain the
+  // count of outstanding operands.
+  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+    SDNode *N = I++;
+    checkForCycles(N);
+    unsigned Degree = N->getNumOperands();
+    if (Degree == 0) {
+      // A node with no uses, add it to the result array immediately.
+      N->setNodeId(DAGSize++);
+      allnodes_iterator Q = N;
+      if (Q != SortedPos)
+        SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+      assert(SortedPos != AllNodes.end() && "Overran node list");
+      ++SortedPos;
+    } else {
+      // Temporarily use the Node Id as scratch space for the degree count.
+      N->setNodeId(Degree);
+    }
+  }
+
+  // Visit all the nodes. As we iterate, moves nodes into sorted order,
+  // such that by the time the end is reached all nodes will be sorted.
+  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+    SDNode *N = I;
+    checkForCycles(N);
+    // N is in sorted position, so all its uses have one less operand
+    // that needs to be sorted.
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI) {
+      SDNode *P = *UI;
+      unsigned Degree = P->getNodeId();
+      assert(Degree != 0 && "Invalid node degree");
+      --Degree;
+      if (Degree == 0) {
+        // All of P's operands are sorted, so P may sorted now.
+        P->setNodeId(DAGSize++);
+        if (P != SortedPos)
+          SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+        assert(SortedPos != AllNodes.end() && "Overran node list");
+        ++SortedPos;
+      } else {
+        // Update P's outstanding operand count.
+        P->setNodeId(Degree);
+      }
+    }
+    if (I == SortedPos) {
+#ifndef NDEBUG
+      SDNode *S = ++I;
+      dbgs() << "Overran sorted position:\n";
+      S->dumprFull();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+
+  assert(SortedPos == AllNodes.end() &&
+         "Topological sort incomplete!");
+  assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+         "First node in topological sort is not the entry token!");
+  assert(AllNodes.front().getNodeId() == 0 &&
+         "First node in topological sort has non-zero id!");
+  assert(AllNodes.front().getNumOperands() == 0 &&
+         "First node in topological sort has operands!");
+  assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+         "Last node in topologic sort has unexpected id!");
+  assert(AllNodes.back().use_empty() &&
+         "Last node in topologic sort has users!");
+  assert(DAGSize == allnodes_size() && "Node count mismatch!");
+  return DAGSize;
+}
+
+/// AssignOrdering - Assign an order to the SDNode.
+void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) {
+  assert(SD && "Trying to assign an order to a null node!");
+  Ordering->add(SD, Order);
+}
+
+/// GetOrdering - Get the order for the SDNode.
+unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
+  assert(SD && "Trying to get the order of a null node!");
+  return Ordering->getOrder(SD);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                              SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+  DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
+                                         EVT VT, int64_t o, unsigned char TF)
+  : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
+    Offset(o), TargetFlags(TF) {
+  TheGlobal = const_cast<GlobalValue*>(GA);
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+                     MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+                     const SDValue *Ops, unsigned NumOps, EVT memvt, 
+                     MachineMemOperand *mmo)
+   : SDNode(Opc, dl, VTs, Ops, NumOps),
+     MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+  AddNodeIDNode(ID, this);
+}
+
+namespace {
+  struct EVTArray {
+    std::vector<EVT> VTs;
+    
+    EVTArray() {
+      VTs.reserve(MVT::LAST_VALUETYPE);
+      for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+        VTs.push_back(MVT((MVT::SimpleValueType)i));
+    }
+  };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+  if (VT.isExtended()) {
+    sys::SmartScopedLock<true> Lock(*VTMutex);
+    return &(*EVTs->insert(VT).first);
+  } else {
+    return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+  }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value.  This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+  assert(Value < getNumValues() && "Bad value!");
+
+  // TODO: Only iterate over uses of a given value of the node
+  for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    if (UI.getUse().getResNo() == Value) {
+      if (NUses == 0)
+        return false;
+      --NUses;
+    }
+  }
+
+  // Found exactly the right number of uses?
+  return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+  assert(Value < getNumValues() && "Bad value!");
+
+  for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+    if (UI.getUse().getResNo() == Value)
+      return true;
+
+  return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+  bool Seen = false;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    SDNode *User = *I;
+    if (User == this)
+      Seen = true;
+    else
+      return false;
+  }
+
+  return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (*this == N->getOperand(i))
+      return true;
+  return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+  for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+    if (this == N->OperandList[i].getNode())
+      return true;
+  return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions.  In practice, this looks through token
+/// factors and non-volatile loads.  In order to remain efficient, this only
+/// looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+                                               unsigned Depth) const {
+  if (*this == Dest) return true;
+
+  // Don't search too deeply, we just want to be able to see through
+  // TokenFactor's etc.
+  if (Depth == 0) return false;
+
+  // If this is a token factor, all inputs to the TF happen in parallel.  If any
+  // of the operands of the TF reach dest, then we can do the xform.
+  if (getOpcode() == ISD::TokenFactor) {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+        return true;
+    return false;
+  }
+
+  // Loads don't have side effects, look through them.
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+    if (!Ld->isVolatile())
+      return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+  }
+  return false;
+}
+
+/// isPredecessorOf - Return true if this node is a predecessor of N. This node
+/// is either an operand of N or it can be reached by traversing up the operands.
+/// NOTE: this is an expensive method. Use it carefully.
+bool SDNode::isPredecessorOf(SDNode *N) const {
+  SmallPtrSet<SDNode *, 32> Visited;
+  SmallVector<SDNode *, 16> Worklist;
+  Worklist.push_back(N);
+
+  do {
+    N = Worklist.pop_back_val();
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+      SDNode *Op = N->getOperand(i).getNode();
+      if (Op == this)
+        return true;
+      if (Visited.insert(Op))
+        Worklist.push_back(Op);
+    }
+  } while (!Worklist.empty());
+
+  return false;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+  assert(Num < NumOperands && "Invalid child # of SDNode!");
+  return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+  switch (getOpcode()) {
+  default:
+    if (getOpcode() < ISD::BUILTIN_OP_END)
+      return "<<Unknown DAG Node>>";
+    if (isMachineOpcode()) {
+      if (G)
+        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+          if (getMachineOpcode() < TII->getNumOpcodes())
+            return TII->get(getMachineOpcode()).getName();
+      return "<<Unknown Machine Node>>";
+    }
+    if (G) {
+      const TargetLowering &TLI = G->getTargetLoweringInfo();
+      const char *Name = TLI.getTargetNodeName(getOpcode());
+      if (Name) return Name;
+      return "<<Unknown Target Node>>";
+    }
+    return "<<Unknown Node>>";
+
+#ifndef NDEBUG
+  case ISD::DELETED_NODE:
+    return "<<Deleted Node!>>";
+#endif
+  case ISD::PREFETCH:      return "Prefetch";
+  case ISD::MEMBARRIER:    return "MemBarrier";
+  case ISD::ATOMIC_CMP_SWAP:    return "AtomicCmpSwap";
+  case ISD::ATOMIC_SWAP:        return "AtomicSwap";
+  case ISD::ATOMIC_LOAD_ADD:    return "AtomicLoadAdd";
+  case ISD::ATOMIC_LOAD_SUB:    return "AtomicLoadSub";
+  case ISD::ATOMIC_LOAD_AND:    return "AtomicLoadAnd";
+  case ISD::ATOMIC_LOAD_OR:     return "AtomicLoadOr";
+  case ISD::ATOMIC_LOAD_XOR:    return "AtomicLoadXor";
+  case ISD::ATOMIC_LOAD_NAND:   return "AtomicLoadNand";
+  case ISD::ATOMIC_LOAD_MIN:    return "AtomicLoadMin";
+  case ISD::ATOMIC_LOAD_MAX:    return "AtomicLoadMax";
+  case ISD::ATOMIC_LOAD_UMIN:   return "AtomicLoadUMin";
+  case ISD::ATOMIC_LOAD_UMAX:   return "AtomicLoadUMax";
+  case ISD::PCMARKER:      return "PCMarker";
+  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+  case ISD::SRCVALUE:      return "SrcValue";
+  case ISD::EntryToken:    return "EntryToken";
+  case ISD::TokenFactor:   return "TokenFactor";
+  case ISD::AssertSext:    return "AssertSext";
+  case ISD::AssertZext:    return "AssertZext";
+
+  case ISD::BasicBlock:    return "BasicBlock";
+  case ISD::VALUETYPE:     return "ValueType";
+  case ISD::Register:      return "Register";
+
+  case ISD::Constant:      return "Constant";
+  case ISD::ConstantFP:    return "ConstantFP";
+  case ISD::GlobalAddress: return "GlobalAddress";
+  case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+  case ISD::FrameIndex:    return "FrameIndex";
+  case ISD::JumpTable:     return "JumpTable";
+  case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+  case ISD::RETURNADDR: return "RETURNADDR";
+  case ISD::FRAMEADDR: return "FRAMEADDR";
+  case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+  case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+  case ISD::LSDAADDR: return "LSDAADDR";
+  case ISD::EHSELECTION: return "EHSELECTION";
+  case ISD::EH_RETURN: return "EH_RETURN";
+  case ISD::ConstantPool:  return "ConstantPool";
+  case ISD::ExternalSymbol: return "ExternalSymbol";
+  case ISD::BlockAddress:  return "BlockAddress";
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+    if (IID < Intrinsic::num_intrinsics)
+      return Intrinsic::getName((Intrinsic::ID)IID);
+    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+      return TII->getName(IID);
+    llvm_unreachable("Invalid intrinsic ID");
+  }
+
+  case ISD::BUILD_VECTOR:   return "BUILD_VECTOR";
+  case ISD::TargetConstant: return "TargetConstant";
+  case ISD::TargetConstantFP:return "TargetConstantFP";
+  case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+  case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+  case ISD::TargetFrameIndex: return "TargetFrameIndex";
+  case ISD::TargetJumpTable:  return "TargetJumpTable";
+  case ISD::TargetConstantPool:  return "TargetConstantPool";
+  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+  case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+  case ISD::CopyToReg:     return "CopyToReg";
+  case ISD::CopyFromReg:   return "CopyFromReg";
+  case ISD::UNDEF:         return "undef";
+  case ISD::MERGE_VALUES:  return "merge_values";
+  case ISD::INLINEASM:     return "inlineasm";
+  case ISD::EH_LABEL:      return "eh_label";
+  case ISD::HANDLENODE:    return "handlenode";
+
+  // Unary operators
+  case ISD::FABS:   return "fabs";
+  case ISD::FNEG:   return "fneg";
+  case ISD::FSQRT:  return "fsqrt";
+  case ISD::FSIN:   return "fsin";
+  case ISD::FCOS:   return "fcos";
+  case ISD::FPOWI:  return "fpowi";
+  case ISD::FPOW:   return "fpow";
+  case ISD::FTRUNC: return "ftrunc";
+  case ISD::FFLOOR: return "ffloor";
+  case ISD::FCEIL:  return "fceil";
+  case ISD::FRINT:  return "frint";
+  case ISD::FNEARBYINT: return "fnearbyint";
+
+  // Binary operators
+  case ISD::ADD:    return "add";
+  case ISD::SUB:    return "sub";
+  case ISD::MUL:    return "mul";
+  case ISD::MULHU:  return "mulhu";
+  case ISD::MULHS:  return "mulhs";
+  case ISD::SDIV:   return "sdiv";
+  case ISD::UDIV:   return "udiv";
+  case ISD::SREM:   return "srem";
+  case ISD::UREM:   return "urem";
+  case ISD::SMUL_LOHI:  return "smul_lohi";
+  case ISD::UMUL_LOHI:  return "umul_lohi";
+  case ISD::SDIVREM:    return "sdivrem";
+  case ISD::UDIVREM:    return "udivrem";
+  case ISD::AND:    return "and";
+  case ISD::OR:     return "or";
+  case ISD::XOR:    return "xor";
+  case ISD::SHL:    return "shl";
+  case ISD::SRA:    return "sra";
+  case ISD::SRL:    return "srl";
+  case ISD::ROTL:   return "rotl";
+  case ISD::ROTR:   return "rotr";
+  case ISD::FADD:   return "fadd";
+  case ISD::FSUB:   return "fsub";
+  case ISD::FMUL:   return "fmul";
+  case ISD::FDIV:   return "fdiv";
+  case ISD::FREM:   return "frem";
+  case ISD::FCOPYSIGN: return "fcopysign";
+  case ISD::FGETSIGN:  return "fgetsign";
+
+  case ISD::SETCC:       return "setcc";
+  case ISD::VSETCC:      return "vsetcc";
+  case ISD::SELECT:      return "select";
+  case ISD::SELECT_CC:   return "select_cc";
+  case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
+  case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
+  case ISD::CONCAT_VECTORS:      return "concat_vectors";
+  case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
+  case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
+  case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
+  case ISD::CARRY_FALSE:         return "carry_false";
+  case ISD::ADDC:        return "addc";
+  case ISD::ADDE:        return "adde";
+  case ISD::SADDO:       return "saddo";
+  case ISD::UADDO:       return "uaddo";
+  case ISD::SSUBO:       return "ssubo";
+  case ISD::USUBO:       return "usubo";
+  case ISD::SMULO:       return "smulo";
+  case ISD::UMULO:       return "umulo";
+  case ISD::SUBC:        return "subc";
+  case ISD::SUBE:        return "sube";
+  case ISD::SHL_PARTS:   return "shl_parts";
+  case ISD::SRA_PARTS:   return "sra_parts";
+  case ISD::SRL_PARTS:   return "srl_parts";
+
+  // Conversion operators.
+  case ISD::SIGN_EXTEND: return "sign_extend";
+  case ISD::ZERO_EXTEND: return "zero_extend";
+  case ISD::ANY_EXTEND:  return "any_extend";
+  case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+  case ISD::TRUNCATE:    return "truncate";
+  case ISD::FP_ROUND:    return "fp_round";
+  case ISD::FLT_ROUNDS_: return "flt_rounds";
+  case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+  case ISD::FP_EXTEND:   return "fp_extend";
+
+  case ISD::SINT_TO_FP:  return "sint_to_fp";
+  case ISD::UINT_TO_FP:  return "uint_to_fp";
+  case ISD::FP_TO_SINT:  return "fp_to_sint";
+  case ISD::FP_TO_UINT:  return "fp_to_uint";
+  case ISD::BIT_CONVERT: return "bit_convert";
+
+  case ISD::CONVERT_RNDSAT: {
+    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+    default: llvm_unreachable("Unknown cvt code!");
+    case ISD::CVT_FF:  return "cvt_ff";
+    case ISD::CVT_FS:  return "cvt_fs";
+    case ISD::CVT_FU:  return "cvt_fu";
+    case ISD::CVT_SF:  return "cvt_sf";
+    case ISD::CVT_UF:  return "cvt_uf";
+    case ISD::CVT_SS:  return "cvt_ss";
+    case ISD::CVT_SU:  return "cvt_su";
+    case ISD::CVT_US:  return "cvt_us";
+    case ISD::CVT_UU:  return "cvt_uu";
+    }
+  }
+
+    // Control flow instructions
+  case ISD::BR:      return "br";
+  case ISD::BRIND:   return "brind";
+  case ISD::BR_JT:   return "br_jt";
+  case ISD::BRCOND:  return "brcond";
+  case ISD::BR_CC:   return "br_cc";
+  case ISD::CALLSEQ_START:  return "callseq_start";
+  case ISD::CALLSEQ_END:    return "callseq_end";
+
+    // Other operators
+  case ISD::LOAD:               return "load";
+  case ISD::STORE:              return "store";
+  case ISD::VAARG:              return "vaarg";
+  case ISD::VACOPY:             return "vacopy";
+  case ISD::VAEND:              return "vaend";
+  case ISD::VASTART:            return "vastart";
+  case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+  case ISD::EXTRACT_ELEMENT:    return "extract_element";
+  case ISD::BUILD_PAIR:         return "build_pair";
+  case ISD::STACKSAVE:          return "stacksave";
+  case ISD::STACKRESTORE:       return "stackrestore";
+  case ISD::TRAP:               return "trap";
+
+  // Bit manipulation
+  case ISD::BSWAP:   return "bswap";
+  case ISD::CTPOP:   return "ctpop";
+  case ISD::CTTZ:    return "cttz";
+  case ISD::CTLZ:    return "ctlz";
+
+  // Trampolines
+  case ISD::TRAMPOLINE: return "trampoline";
+
+  case ISD::CONDCODE:
+    switch (cast<CondCodeSDNode>(this)->get()) {
+    default: llvm_unreachable("Unknown setcc condition!");
+    case ISD::SETOEQ:  return "setoeq";
+    case ISD::SETOGT:  return "setogt";
+    case ISD::SETOGE:  return "setoge";
+    case ISD::SETOLT:  return "setolt";
+    case ISD::SETOLE:  return "setole";
+    case ISD::SETONE:  return "setone";
+
+    case ISD::SETO:    return "seto";
+    case ISD::SETUO:   return "setuo";
+    case ISD::SETUEQ:  return "setue";
+    case ISD::SETUGT:  return "setugt";
+    case ISD::SETUGE:  return "setuge";
+    case ISD::SETULT:  return "setult";
+    case ISD::SETULE:  return "setule";
+    case ISD::SETUNE:  return "setune";
+
+    case ISD::SETEQ:   return "seteq";
+    case ISD::SETGT:   return "setgt";
+    case ISD::SETGE:   return "setge";
+    case ISD::SETLT:   return "setlt";
+    case ISD::SETLE:   return "setle";
+    case ISD::SETNE:   return "setne";
+    }
+  }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+  switch (AM) {
+  default:
+    return "";
+  case ISD::PRE_INC:
+    return "<pre-inc>";
+  case ISD::PRE_DEC:
+    return "<pre-dec>";
+  case ISD::POST_INC:
+    return "<post-inc>";
+  case ISD::POST_DEC:
+    return "<post-dec>";
+  }
+}
+
+std::string ISD::ArgFlagsTy::getArgFlagsString() {
+  std::string S = "< ";
+
+  if (isZExt())
+    S += "zext ";
+  if (isSExt())
+    S += "sext ";
+  if (isInReg())
+    S += "inreg ";
+  if (isSRet())
+    S += "sret ";
+  if (isByVal())
+    S += "byval ";
+  if (isNest())
+    S += "nest ";
+  if (getByValAlign())
+    S += "byval-align:" + utostr(getByValAlign()) + " ";
+  if (getOrigAlign())
+    S += "orig-align:" + utostr(getOrigAlign()) + " ";
+  if (getByValSize())
+    S += "byval-size:" + utostr(getByValSize()) + " ";
+  return S + ">";
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+  print(dbgs(), G);
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+  OS << (void*)this << ": ";
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+    if (i) OS << ",";
+    if (getValueType(i) == MVT::Other)
+      OS << "ch";
+    else
+      OS << getValueType(i).getEVTString();
+  }
+  OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+    if (!MN->memoperands_empty()) {
+      OS << "<";
+      OS << "Mem:";
+      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+           e = MN->memoperands_end(); i != e; ++i) {
+        OS << **i;
+        if (next(i) != e)
+          OS << " ";
+      }
+      OS << ">";
+    }
+  } else if (const ShuffleVectorSDNode *SVN =
+               dyn_cast<ShuffleVectorSDNode>(this)) {
+    OS << "<";
+    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (i) OS << ",";
+      if (Idx < 0)
+        OS << "u";
+      else
+        OS << Idx;
+    }
+    OS << ">";
+  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+    OS << '<' << CSDN->getAPIntValue() << '>';
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+    else {
+      OS << "<APFloat(";
+      CSDN->getValueAPF().bitcastToAPInt().dump();
+      OS << ")>";
+    }
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(this)) {
+    int64_t offset = GADN->getOffset();
+    OS << '<';
+    WriteAsOperand(OS, GADN->getGlobal());
+    OS << '>';
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = GADN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+    OS << "<" << FIDN->getIndex() << ">";
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+    OS << "<" << JTDN->getIndex() << ">";
+    if (unsigned int TF = JTDN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+    int offset = CP->getOffset();
+    if (CP->isMachineConstantPoolEntry())
+      OS << "<" << *CP->getMachineCPVal() << ">";
+    else
+      OS << "<" << *CP->getConstVal() << ">";
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = CP->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+    OS << "<";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      OS << LBB->getName() << " ";
+    OS << (const void*)BBDN->getBasicBlock() << ">";
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+    if (G && R->getReg() &&
+        TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+      OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
+    } else {
+      OS << " %reg" << R->getReg();
+    }
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(this)) {
+    OS << "'" << ES->getSymbol() << "'";
+    if (unsigned int TF = ES->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+    if (M->getValue())
+      OS << "<" << M->getValue() << ">";
+    else
+      OS << "<null>";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+    OS << ":" << N->getVT().getEVTString();
+  }
+  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+    OS << "<" << *LD->getMemOperand();
+
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD: OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
+    }
+    if (doExt)
+      OS << " from " << LD->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(LD->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+    OS << "<" << *ST->getMemOperand();
+
+    if (ST->isTruncatingStore())
+      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(ST->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+    
+    OS << ">";
+  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+    OS << "<" << *M->getMemOperand() << ">";
+  } else if (const BlockAddressSDNode *BA =
+               dyn_cast<BlockAddressSDNode>(this)) {
+    OS << "<";
+    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+    OS << ", ";
+    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+    OS << ">";
+    if (unsigned int TF = BA->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  }
+
+  if (G)
+    if (unsigned Order = G->GetOrdering(this))
+      OS << " [ORD=" << Order << ']';
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (i) OS << ", "; else OS << " ";
+    OS << (void*)getOperand(i).getNode();
+    if (unsigned RN = getOperand(i).getResNo())
+      OS << ":" << RN;
+  }
+  print_details(OS, G);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+                                  const SelectionDAG *G, unsigned depth,
+                                  unsigned indent) 
+{
+  if (depth == 0)
+    return;
+
+  OS.indent(indent);
+
+  N->print(OS, G);
+
+  if (depth < 1)
+    return;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    OS << '\n';
+    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+  }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+                            unsigned depth) const {
+  printrWithDepthHelper(OS, this, G, depth, 0);
+} 
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  printrWithDepth(OS, G, 100);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+  printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  dumprWithDepth(G, 100);
+} 
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i).getNode()->hasOneUse())
+      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+    else
+      dbgs() << "\n" << std::string(indent+2, ' ')
+           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+
+  dbgs() << "\n";
+  dbgs().indent(indent);
+  N->dump(G);
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+  assert(N->getNumValues() == 1 &&
+         "Can't unroll a vector with multiple results!");
+
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  unsigned i;
+  for (i= 0; i != NE; ++i) {
+    for (unsigned j = 0; j != N->getNumOperands(); ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        // A vector operand; extract a single element.
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                              OperandEltVT,
+                              Operand,
+                              getConstant(i, MVT::i32));
+      } else {
+        // A scalar operand; just use it as is.
+        Operands[j] = Operand;
+      }
+    }
+
+    switch (N->getOpcode()) {
+    default:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                &Operands[0], Operands.size()));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+    case ISD::ROTL:
+    case ISD::ROTR:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+                                getShiftAmountOperand(Operands[1])));
+      break;
+    case ISD::SIGN_EXTEND_INREG:
+    case ISD::FP_ROUND_INREG: {
+      EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                Operands[0],
+                                getValueType(ExtVT)));
+    }
+    }
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(getUNDEF(EltVT));
+
+  return getNode(ISD::BUILD_VECTOR, dl,
+                 EVT::getVectorVT(*getContext(), EltVT, ResNE),
+                 &Scalars[0], Scalars.size());
+}
+
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+/// location that is 'Dist' units away from the location that the 'Base' load 
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, 
+                                     unsigned Bytes, int Dist) const {
+  if (LD->getChain() != Base->getChain())
+    return false;
+  EVT VT = LD->getValueType(0);
+  if (VT.getSizeInBits() / 8 != Bytes)
+    return false;
+
+  SDValue Loc = LD->getOperand(1);
+  SDValue BaseLoc = Base->getOperand(1);
+  if (Loc.getOpcode() == ISD::FrameIndex) {
+    if (BaseLoc.getOpcode() != ISD::FrameIndex)
+      return false;
+    const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FS  = MFI->getObjectSize(FI);
+    int BFS = MFI->getObjectSize(BFI);
+    if (FS != BFS || FS != (int)Bytes) return false;
+    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+  }
+  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+    if (V && (V->getSExtValue() == Dist*Bytes))
+      return true;
+  }
+
+  GlobalValue *GV1 = NULL;
+  GlobalValue *GV2 = NULL;
+  int64_t Offset1 = 0;
+  int64_t Offset2 = 0;
+  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  if (isGA1 && isGA2 && GV1 == GV2)
+    return Offset1 == (Offset2 + Dist*Bytes);
+  return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+  // If this is a GlobalAddress + cst, return the alignment.
+  GlobalValue *GV;
+  int64_t GVOffset = 0;
+  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
+    return MinAlign(GV->getAlignment(), GVOffset);
+
+  // If this is a direct reference to a stack slot, use information about the
+  // stack slot's alignment.
+  int FrameIdx = 1 << 31;
+  int64_t FrameOffset = 0;
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+    FrameIdx = FI->getIndex();
+  } else if (Ptr.getOpcode() == ISD::ADD &&
+             isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+             isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+    FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+    FrameOffset = Ptr.getConstantOperandVal(1);
+  }
+
+  if (FrameIdx != (1 << 31)) {
+    // FIXME: Handle FI+CST.
+    const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+    unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+                                    FrameOffset);
+    if (MFI.isFixedObjectIndex(FrameIdx)) {
+      int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
+
+      // The alignment of the frame index can be determined from its offset from
+      // the incoming frame position.  If the frame object is at offset 32 and
+      // the stack is guaranteed to be 16-byte aligned, then we know that the
+      // object is 16-byte aligned.
+      unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment();
+      unsigned Align = MinAlign(ObjectOffset, StackAlign);
+
+      // Finally, the frame object itself may have a known alignment.  Factor
+      // the alignment + offset into a new alignment.  For example, if we know
+      // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
+      // 4-byte alignment of the resultant pointer.  Likewise align 4 + 4-byte
+      // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
+      return std::max(Align, FIInfoAlign);
+    }
+    return FIInfoAlign;
+  }
+
+  return 0;
+}
+
+void SelectionDAG::dump() const {
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I) {
+    const SDNode *N = I;
+    if (!N->hasOneUse() && N != getRoot().getNode())
+      DumpNodes(N, 2, this);
+  }
+
+  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+
+  dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+                       const SelectionDAG *G, VisitedSDNodeSet &once) {
+  if (!once.insert(N))          // If we've been here before, return now.
+    return;
+
+  // Dump the current SDNode, but don't end the line yet.
+  OS << std::string(indent, ' ');
+  N->printr(OS, G);
+
+  // Having printed this SDNode, walk the children:
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+
+    if (i) OS << ",";
+    OS << " ";
+
+    if (child->getNumOperands() == 0) {
+      // This child has no grandchildren; print it inline right here.
+      child->printr(OS, G);
+      once.insert(child);
+    } else {         // Just the address. FIXME: also print the child's opcode.
+      OS << (void*)child;
+      if (unsigned RN = N->getOperand(i).getResNo())
+        OS << ":" << RN;
+    }
+  }
+
+  OS << "\n";
+
+  // Dump children that have grandchildren on their own line(s).
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+    DumpNodesr(OS, child, indent+2, G, once);
+  }
+}
+
+void SDNode::dumpr() const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+  return getGlobal()->getType()->getAddressSpace();
+}
+
+
+const Type *ConstantPoolSDNode::getType() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getType();
+  return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+                                        APInt &SplatUndef,
+                                        unsigned &SplatBitSize,
+                                        bool &HasAnyUndefs,
+                                        unsigned MinSplatBits,
+                                        bool isBigEndian) {
+  EVT VT = getValueType(0);
+  assert(VT.isVector() && "Expected a vector type");
+  unsigned sz = VT.getSizeInBits();
+  if (MinSplatBits > sz)
+    return false;
+
+  SplatValue = APInt(sz, 0);
+  SplatUndef = APInt(sz, 0);
+
+  // Get the bits.  Bits with undefined values (when the corresponding element
+  // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+  // in SplatValue.  If any of the values are not constant, give up and return
+  // false.
+  unsigned int nOps = getNumOperands();
+  assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+  unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+
+  for (unsigned j = 0; j < nOps; ++j) {
+    unsigned i = isBigEndian ? nOps-1-j : j;
+    SDValue OpVal = getOperand(i);
+    unsigned BitPos = j * EltBitSize;
+
+    if (OpVal.getOpcode() == ISD::UNDEF)
+      SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+    else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+      SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+                     zextOrTrunc(sz) << BitPos);
+    else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+      SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+     else
+      return false;
+  }
+
+  // The build_vector is all constants or undefs.  Find the smallest element
+  // size that splats the vector.
+
+  HasAnyUndefs = (SplatUndef != 0);
+  while (sz > 8) {
+
+    unsigned HalfSize = sz / 2;
+    APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
+    APInt LowValue = APInt(SplatValue).trunc(HalfSize);
+    APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
+    APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+
+    // If the two halves do not match (ignoring undef bits), stop here.
+    if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+        MinSplatBits > HalfSize)
+      break;
+
+    SplatValue = HighValue | LowValue;
+    SplatUndef = HighUndef & LowUndef;
+
+    sz = HalfSize;
+  }
+
+  SplatBitSize = sz;
+  return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+  // Find the first non-undef value in the shuffle mask.
+  unsigned i, e;
+  for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+    /* search */;
+
+  assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+  // Make sure all remaining elements are either undef or the same as the first
+  // non-undef value.
+  for (int Idx = Mask[i]; i != e; ++i)
+    if (Mask[i] >= 0 && Mask[i] != Idx)
+      return false;
+  return true;
+}
+
+static void checkForCyclesHelper(const SDNode *N,
+                                 std::set<const SDNode *> &visited) {
+  if (visited.find(N) != visited.end()) {
+    dbgs() << "Offending node:\n";
+    N->dumprFull();
+    assert(0 && "Detected cycle in SelectionDAG");
+  }
+
+  std::set<const SDNode*>::iterator i;
+  bool inserted;
+
+  tie(i, inserted) = visited.insert(N);
+  assert(inserted && "Missed cycle");
+
+  for(unsigned i = 0; i < N->getNumOperands(); ++i) {
+    checkForCyclesHelper(N->getOperand(i).getNode(), visited);
+  }
+  visited.erase(i);
+}
+
+void llvm::checkForCycles(const llvm::SDNode *N) {
+#ifdef XDEBUG
+  assert(N && "Checking nonexistant SDNode");
+  std::set<const SDNode *> visited;
+  checkForCyclesHelper(N, visited);
+#endif
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
+  checkForCycles(DAG->getRoot().getNode());
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 0000000..de17f90
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,6164 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+                 cl::desc("Generate low-precision inline sequences "
+                          "for some float libcalls"),
+                 cl::location(LimitFloatPrecision),
+                 cl::init(0));
+
+namespace {
+  /// RegsForValue - This struct represents the registers (physical or virtual)
+  /// that a particular set of values is assigned, and the type information
+  /// about the value. The most common situation is to represent one value at a
+  /// time, but struct or array values are handled element-wise as multiple
+  /// values.  The splitting of aggregates is performed recursively, so that we
+  /// never have aggregate-typed registers. The values at this point do not
+  /// necessarily have legal types, so each value may require one or more
+  /// registers of some legal type.
+  ///
+  struct RegsForValue {
+    /// TLI - The TargetLowering object.
+    ///
+    const TargetLowering *TLI;
+
+    /// ValueVTs - The value types of the values, which may not be legal, and
+    /// may need be promoted or synthesized from one or more registers.
+    ///
+    SmallVector<EVT, 4> ValueVTs;
+
+    /// RegVTs - The value types of the registers. This is the same size as
+    /// ValueVTs and it records, for each value, what the type of the assigned
+    /// register or registers are. (Individual values are never synthesized
+    /// from more than one type of register.)
+    ///
+    /// With virtual registers, the contents of RegVTs is redundant with TLI's
+    /// getRegisterType member function, however when with physical registers
+    /// it is necessary to have a separate record of the types.
+    ///
+    SmallVector<EVT, 4> RegVTs;
+
+    /// Regs - This list holds the registers assigned to the values.
+    /// Each legal or promoted value requires one register, and each
+    /// expanded value requires multiple registers.
+    ///
+    SmallVector<unsigned, 4> Regs;
+
+    RegsForValue() : TLI(0) {}
+
+    RegsForValue(const TargetLowering &tli,
+                 const SmallVector<unsigned, 4> &regs,
+                 EVT regvt, EVT valuevt)
+      : TLI(&tli),  ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+    RegsForValue(const TargetLowering &tli,
+                 const SmallVector<unsigned, 4> &regs,
+                 const SmallVector<EVT, 4> &regvts,
+                 const SmallVector<EVT, 4> &valuevts)
+      : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
+    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+                 unsigned Reg, const Type *Ty) : TLI(&tli) {
+      ComputeValueVTs(tli, Ty, ValueVTs);
+
+      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+        EVT ValueVT = ValueVTs[Value];
+        unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
+        EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
+        for (unsigned i = 0; i != NumRegs; ++i)
+          Regs.push_back(Reg + i);
+        RegVTs.push_back(RegisterVT);
+        Reg += NumRegs;
+      }
+    }
+
+    /// areValueTypesLegal - Return true if types of all the values are legal.
+    bool areValueTypesLegal() {
+      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+        EVT RegisterVT = RegVTs[Value];
+        if (!TLI->isTypeLegal(RegisterVT))
+          return false;
+      }
+      return true;
+    }
+
+
+    /// append - Add the specified values to this one.
+    void append(const RegsForValue &RHS) {
+      TLI = RHS.TLI;
+      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+    }
+
+
+    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+    /// this value and returns the result as a ValueVTs value.  This uses
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+                            SDValue &Chain, SDValue *Flag) const;
+
+    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+    /// specified value into the registers specified by this object.  This uses
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+                       unsigned Order, SDValue &Chain, SDValue *Flag) const;
+
+    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+    /// operand list.  This adds the code marker, matching input operand index
+    /// (if applicable), and includes the number of values added into it.
+    void AddInlineAsmOperands(unsigned Code,
+                              bool HasMatching, unsigned MatchingIdx,
+                              SelectionDAG &DAG, unsigned Order,
+                              std::vector<SDValue> &Ops) const;
+  };
+}
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent.  If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+                                const SDValue *Parts,
+                                unsigned NumParts, EVT PartVT, EVT ValueVT,
+                                ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+  assert(NumParts > 0 && "No parts to assemble!");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Val = Parts[0];
+
+  if (NumParts > 1) {
+    // Assemble the value from multiple parts.
+    if (!ValueVT.isVector() && ValueVT.isInteger()) {
+      unsigned PartBits = PartVT.getSizeInBits();
+      unsigned ValueBits = ValueVT.getSizeInBits();
+
+      // Assemble the power of 2 part.
+      unsigned RoundParts = NumParts & (NumParts - 1) ?
+        1 << Log2_32(NumParts) : NumParts;
+      unsigned RoundBits = PartBits * RoundParts;
+      EVT RoundVT = RoundBits == ValueBits ?
+        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+      SDValue Lo, Hi;
+
+      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+      if (RoundParts > 2) {
+        Lo = getCopyFromParts(DAG, dl, Order, Parts, RoundParts / 2,
+                              PartVT, HalfVT);
+        Hi = getCopyFromParts(DAG, dl, Order, Parts + RoundParts / 2,
+                              RoundParts / 2, PartVT, HalfVT);
+      } else {
+        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
+        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+      }
+
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+
+      Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+
+      if (RoundParts < NumParts) {
+        // Assemble the trailing non-power-of-2 part.
+        unsigned OddParts = NumParts - RoundParts;
+        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+        Hi = getCopyFromParts(DAG, dl, Order,
+                              Parts + RoundParts, OddParts, PartVT, OddVT);
+
+        // Combine the round and odd parts.
+        Lo = Val;
+        if (TLI.isBigEndian())
+          std::swap(Lo, Hi);
+        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+        Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
+        Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
+                                         TLI.getPointerTy()));
+        Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
+        Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
+      }
+    } else if (ValueVT.isVector()) {
+      // Handle a multi-element vector.
+      EVT IntermediateVT, RegisterVT;
+      unsigned NumIntermediates;
+      unsigned NumRegs =
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                   NumIntermediates, RegisterVT);
+      assert(NumRegs == NumParts
+             && "Part count doesn't match vector breakdown!");
+      NumParts = NumRegs; // Silence a compiler warning.
+      assert(RegisterVT == PartVT
+             && "Part type doesn't match vector breakdown!");
+      assert(RegisterVT == Parts[0].getValueType() &&
+             "Part type doesn't match part!");
+
+      // Assemble the parts into intermediate operands.
+      SmallVector<SDValue, 8> Ops(NumIntermediates);
+      if (NumIntermediates == NumParts) {
+        // If the register was not expanded, truncate or copy the value,
+        // as appropriate.
+        for (unsigned i = 0; i != NumParts; ++i)
+          Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i], 1,
+                                    PartVT, IntermediateVT);
+      } else if (NumParts > 0) {
+        // If the intermediate type was expanded, build the intermediate
+        // operands from the parts.
+        assert(NumParts % NumIntermediates == 0 &&
+               "Must expand into a divisible number of parts!");
+        unsigned Factor = NumParts / NumIntermediates;
+        for (unsigned i = 0; i != NumIntermediates; ++i)
+          Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i * Factor], Factor,
+                                    PartVT, IntermediateVT);
+      }
+
+      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+      // intermediate operands.
+      Val = DAG.getNode(IntermediateVT.isVector() ?
+                        ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
+                        ValueVT, &Ops[0], NumIntermediates);
+    } else if (PartVT.isFloatingPoint()) {
+      // FP split into multiple FP parts (for ppcf128)
+      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
+             "Unexpected split");
+      SDValue Lo, Hi;
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+    } else {
+      // FP split into integer parts (soft fp)
+      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+             !PartVT.isVector() && "Unexpected split");
+      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+      Val = getCopyFromParts(DAG, dl, Order, Parts, NumParts, PartVT, IntVT);
+    }
+  }
+
+  // There is now one part, held in Val.  Correct it to match ValueVT.
+  PartVT = Val.getValueType();
+
+  if (PartVT == ValueVT)
+    return Val;
+
+  if (PartVT.isVector()) {
+    assert(ValueVT.isVector() && "Unknown vector conversion!");
+    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+  }
+
+  if (ValueVT.isVector()) {
+    assert(ValueVT.getVectorElementType() == PartVT &&
+           ValueVT.getVectorNumElements() == 1 &&
+           "Only trivial scalar-to-vector conversions should get here!");
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
+  }
+
+  if (PartVT.isInteger() &&
+      ValueVT.isInteger()) {
+    if (ValueVT.bitsLT(PartVT)) {
+      // For a truncate, see if we have any information to
+      // indicate whether the truncated bits will always be
+      // zero or sign-extension.
+      if (AssertOp != ISD::DELETED_NODE)
+        Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+                          DAG.getValueType(ValueVT));
+      return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+    } else {
+      return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+    }
+  }
+
+  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+    if (ValueVT.bitsLT(Val.getValueType())) {
+      // FP_ROUND's are always exact here.
+      return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+                         DAG.getIntPtrConstant(1));
+    }
+
+    return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+  }
+
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+
+  llvm_unreachable("Unknown mismatch!");
+  return SDValue();
+}
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts.  If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+                           SDValue Val, SDValue *Parts, unsigned NumParts,
+                           EVT PartVT,
+                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT PtrVT = TLI.getPointerTy();
+  EVT ValueVT = Val.getValueType();
+  unsigned PartBits = PartVT.getSizeInBits();
+  unsigned OrigNumParts = NumParts;
+  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+  if (!NumParts)
+    return;
+
+  if (!ValueVT.isVector()) {
+    if (PartVT == ValueVT) {
+      assert(NumParts == 1 && "No-op copy with multiple parts!");
+      Parts[0] = Val;
+      return;
+    }
+
+    if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+      // If the parts cover more bits than the value has, promote the value.
+      if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+        assert(NumParts == 1 && "Do not know what to promote to!");
+        Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
+      } else if (PartVT.isInteger() && ValueVT.isInteger()) {
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+        Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
+      } else {
+        llvm_unreachable("Unknown mismatch!");
+      }
+    } else if (PartBits == ValueVT.getSizeInBits()) {
+      // Different types of the same size.
+      assert(NumParts == 1 && PartVT != ValueVT);
+      Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+    } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+      // If the parts cover less bits than value has, truncate the value.
+      if (PartVT.isInteger() && ValueVT.isInteger()) {
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+        Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+      } else {
+        llvm_unreachable("Unknown mismatch!");
+      }
+    }
+
+    // The value may have changed - recompute ValueVT.
+    ValueVT = Val.getValueType();
+    assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+           "Failed to tile the value with PartVT!");
+
+    if (NumParts == 1) {
+      assert(PartVT == ValueVT && "Type conversion failed!");
+      Parts[0] = Val;
+      return;
+    }
+
+    // Expand the value into multiple parts.
+    if (NumParts & (NumParts - 1)) {
+      // The number of parts is not a power of 2.  Split off and copy the tail.
+      assert(PartVT.isInteger() && ValueVT.isInteger() &&
+             "Do not know what to expand to!");
+      unsigned RoundParts = 1 << Log2_32(NumParts);
+      unsigned RoundBits = RoundParts * PartBits;
+      unsigned OddParts = NumParts - RoundParts;
+      SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
+                                   DAG.getConstant(RoundBits,
+                                                   TLI.getPointerTy()));
+      getCopyToParts(DAG, dl, Order, OddVal, Parts + RoundParts,
+                     OddParts, PartVT);
+
+      if (TLI.isBigEndian())
+        // The odd parts were reversed by getCopyToParts - unreverse them.
+        std::reverse(Parts + RoundParts, Parts + NumParts);
+
+      NumParts = RoundParts;
+      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+      Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+    }
+
+    // The number of parts is a power of 2.  Repeatedly bisect the value using
+    // EXTRACT_ELEMENT.
+    Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           EVT::getIntegerVT(*DAG.getContext(),
+                                             ValueVT.getSizeInBits()),
+                           Val);
+
+    for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+      for (unsigned i = 0; i < NumParts; i += StepSize) {
+        unsigned ThisBits = StepSize * PartBits / 2;
+        EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+        SDValue &Part0 = Parts[i];
+        SDValue &Part1 = Parts[i+StepSize/2];
+
+        Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+                            ThisVT, Part0,
+                            DAG.getConstant(1, PtrVT));
+        Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+                            ThisVT, Part0,
+                            DAG.getConstant(0, PtrVT));
+
+        if (ThisBits == PartBits && ThisVT != PartVT) {
+          Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
+                                                PartVT, Part0);
+          Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
+                                                PartVT, Part1);
+        }
+      }
+    }
+
+    if (TLI.isBigEndian())
+      std::reverse(Parts, Parts + OrigNumParts);
+
+    return;
+  }
+
+  // Vector ValueVT.
+  if (NumParts == 1) {
+    if (PartVT != ValueVT) {
+      if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+        Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+      } else {
+        assert(ValueVT.getVectorElementType() == PartVT &&
+               ValueVT.getVectorNumElements() == 1 &&
+               "Only trivial vector-to-scalar conversions should get here!");
+        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                          PartVT, Val,
+                          DAG.getConstant(0, PtrVT));
+      }
+    }
+
+    Parts[0] = Val;
+    return;
+  }
+
+  // Handle a multi-element vector.
+  EVT IntermediateVT, RegisterVT;
+  unsigned NumIntermediates;
+  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+                              IntermediateVT, NumIntermediates, RegisterVT);
+  unsigned NumElements = ValueVT.getVectorNumElements();
+
+  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+  NumParts = NumRegs; // Silence a compiler warning.
+  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+  // Split the vector into intermediate operands.
+  SmallVector<SDValue, 8> Ops(NumIntermediates);
+  for (unsigned i = 0; i != NumIntermediates; ++i) {
+    if (IntermediateVT.isVector())
+      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+                           IntermediateVT, Val,
+                           DAG.getConstant(i * (NumElements / NumIntermediates),
+                                           PtrVT));
+    else
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                           IntermediateVT, Val,
+                           DAG.getConstant(i, PtrVT));
+  }
+
+  // Split the intermediate operands into legal parts.
+  if (NumParts == NumIntermediates) {
+    // If the register was not expanded, promote or copy the value,
+    // as appropriate.
+    for (unsigned i = 0; i != NumParts; ++i)
+      getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i], 1, PartVT);
+  } else if (NumParts > 0) {
+    // If the intermediate type was expanded, split each the value into
+    // legal parts.
+    assert(NumParts % NumIntermediates == 0 &&
+           "Must expand into a divisible number of parts!");
+    unsigned Factor = NumParts / NumIntermediates;
+    for (unsigned i = 0; i != NumIntermediates; ++i)
+      getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i*Factor], Factor, PartVT);
+  }
+}
+
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+  AA = &aa;
+  GFI = gfi;
+  TD = DAG.getTarget().getTargetData();
+}
+
+/// clear - Clear out the curret SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+  NodeMap.clear();
+  PendingLoads.clear();
+  PendingExports.clear();
+  EdgeMapping.clear();
+  DAG.clear();
+  CurDebugLoc = DebugLoc::getUnknownLoc();
+  HasTailCall = false;
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+  if (PendingLoads.empty())
+    return DAG.getRoot();
+
+  if (PendingLoads.size() == 1) {
+    SDValue Root = PendingLoads[0];
+    DAG.setRoot(Root);
+    PendingLoads.clear();
+    return Root;
+  }
+
+  // Otherwise, we have to make a token factor node.
+  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                               &PendingLoads[0], PendingLoads.size());
+  PendingLoads.clear();
+  DAG.setRoot(Root);
+  return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+  SDValue Root = DAG.getRoot();
+
+  if (PendingExports.empty())
+    return Root;
+
+  // Turn all of the CopyToReg chains into one factored node.
+  if (Root.getOpcode() != ISD::EntryToken) {
+    unsigned i = 0, e = PendingExports.size();
+    for (; i != e; ++i) {
+      assert(PendingExports[i].getNode()->getNumOperands() > 1);
+      if (PendingExports[i].getNode()->getOperand(0) == Root)
+        break;  // Don't add the root if we already indirectly depend on it.
+    }
+
+    if (i == e)
+      PendingExports.push_back(Root);
+  }
+
+  Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                     &PendingExports[0],
+                     PendingExports.size());
+  PendingExports.clear();
+  DAG.setRoot(Root);
+  return Root;
+}
+
+void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
+  if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
+  DAG.AssignOrdering(Node, SDNodeOrder);
+
+  for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
+    AssignOrderingToNode(Node->getOperand(I).getNode());
+}
+
+void SelectionDAGBuilder::visit(Instruction &I) {
+  visit(I.getOpcode(), I);
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
+  // Note: this doesn't use InstVisitor, because it has to work with
+  // ConstantExpr's in addition to instructions.
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown instruction type encountered!");
+    // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+#include "llvm/Instruction.def"
+  }
+
+  // Assign the ordering to the freshly created DAG nodes.
+  if (NodeMap.count(&I)) {
+    ++SDNodeOrder;
+    AssignOrderingToNode(getValue(&I).getNode());
+  }
+}
+
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+  SDValue &N = NodeMap[V];
+  if (N.getNode()) return N;
+
+  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+    EVT VT = TLI.getValueType(V->getType(), true);
+
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+      return N = DAG.getConstant(*CI, VT);
+
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+      return N = DAG.getGlobalAddress(GV, VT);
+
+    if (isa<ConstantPointerNull>(C))
+      return N = DAG.getConstant(0, TLI.getPointerTy());
+
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+      return N = DAG.getConstantFP(*CFP, VT);
+
+    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+      return N = DAG.getUNDEF(VT);
+
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      visit(CE->getOpcode(), *CE);
+      SDValue N1 = NodeMap[V];
+      assert(N1.getNode() && "visit didn't populate the ValueMap!");
+      return N1;
+    }
+
+    if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+      SmallVector<SDValue, 4> Constants;
+      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+           OI != OE; ++OI) {
+        SDNode *Val = getValue(*OI).getNode();
+        // If the operand is an empty aggregate, there are no values.
+        if (!Val) continue;
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
+        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+          Constants.push_back(SDValue(Val, i));
+      }
+
+      return DAG.getMergeValues(&Constants[0], Constants.size(),
+                                getCurDebugLoc());
+    }
+
+    if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+             "Unknown struct or array constant!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, C->getType(), ValueVTs);
+      unsigned NumElts = ValueVTs.size();
+      if (NumElts == 0)
+        return SDValue(); // empty struct
+      SmallVector<SDValue, 4> Constants(NumElts);
+      for (unsigned i = 0; i != NumElts; ++i) {
+        EVT EltVT = ValueVTs[i];
+        if (isa<UndefValue>(C))
+          Constants[i] = DAG.getUNDEF(EltVT);
+        else if (EltVT.isFloatingPoint())
+          Constants[i] = DAG.getConstantFP(0, EltVT);
+        else
+          Constants[i] = DAG.getConstant(0, EltVT);
+      }
+
+      return DAG.getMergeValues(&Constants[0], NumElts,
+                                getCurDebugLoc());
+    }
+
+    if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+      return DAG.getBlockAddress(BA, VT);
+
+    const VectorType *VecTy = cast<VectorType>(V->getType());
+    unsigned NumElements = VecTy->getNumElements();
+
+    // Now that we know the number and type of the elements, get that number of
+    // elements into the Ops array based on what kind of constant it is.
+    SmallVector<SDValue, 16> Ops;
+    if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+      for (unsigned i = 0; i != NumElements; ++i)
+        Ops.push_back(getValue(CP->getOperand(i)));
+    } else {
+      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+      EVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+      SDValue Op;
+      if (EltVT.isFloatingPoint())
+        Op = DAG.getConstantFP(0, EltVT);
+      else
+        Op = DAG.getConstant(0, EltVT);
+      Ops.assign(NumElements, Op);
+    }
+
+    // Create a BUILD_VECTOR node.
+    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                    VT, &Ops[0], Ops.size());
+  }
+
+  // If this is a static alloca, generate it as the frameindex instead of
+  // computation.
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI != FuncInfo.StaticAllocaMap.end())
+      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+  }
+
+  unsigned InReg = FuncInfo.ValueMap[V];
+  assert(InReg && "Value not in map!");
+
+  RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+  SDValue Chain = DAG.getEntryNode();
+  return RFV.getCopyFromRegs(DAG, getCurDebugLoc(),
+                             SDNodeOrder, Chain, NULL);
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return 
+/// type of the given function.  This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+static void getReturnInfo(const Type* ReturnType,
+                   Attributes attr, SmallVectorImpl<EVT> &OutVTs,
+                   SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
+                   TargetLowering &TLI,
+                   SmallVectorImpl<uint64_t> *Offsets = 0) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, ReturnType, ValueVTs);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0) return;
+  unsigned Offset = 0;
+
+  for (unsigned j = 0, f = NumValues; j != f; ++j) {
+    EVT VT = ValueVTs[j];
+    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+    if (attr & Attribute::SExt)
+      ExtendKind = ISD::SIGN_EXTEND;
+    else if (attr & Attribute::ZExt)
+      ExtendKind = ISD::ZERO_EXTEND;
+
+    // FIXME: C calling convention requires the return type to be promoted to
+    // at least 32-bit. But this is not necessary for non-C calling
+    // conventions. The frontend should mark functions whose return values
+    // require promoting with signext or zeroext attributes.
+    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+      if (VT.bitsLT(MinVT))
+        VT = MinVT;
+    }
+
+    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
+                        PartVT.getTypeForEVT(ReturnType->getContext()));
+
+    // 'inreg' on function refers to return value
+    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+    if (attr & Attribute::InReg)
+      Flags.setInReg();
+
+    // Propagate extension type if any
+    if (attr & Attribute::SExt)
+      Flags.setSExt();
+    else if (attr & Attribute::ZExt)
+      Flags.setZExt();
+
+    for (unsigned i = 0; i < NumParts; ++i) {
+      OutVTs.push_back(PartVT);
+      OutFlags.push_back(Flags);
+      if (Offsets)
+      {
+        Offsets->push_back(Offset);
+        Offset += PartSize;
+      }
+    }
+  }
+}
+
+void SelectionDAGBuilder::visitRet(ReturnInst &I) {
+  SDValue Chain = getControlRoot();
+  SmallVector<ISD::OutputArg, 8> Outs;
+  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+  if (!FLI.CanLowerReturn) {
+    unsigned DemoteReg = FLI.DemoteRegister;
+    const Function *F = I.getParent()->getParent();
+
+    // Emit a store of the return value through the virtual register.
+    // Leave Outs empty so that LowerReturn won't try to load return
+    // registers the usual way.
+    SmallVector<EVT, 1> PtrValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+                    PtrValueVTs);
+
+    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+    SDValue RetOp = getValue(I.getOperand(0));
+
+    SmallVector<EVT, 4> ValueVTs;
+    SmallVector<uint64_t, 4> Offsets;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+    unsigned NumValues = ValueVTs.size();
+
+    SmallVector<SDValue, 4> Chains(NumValues);
+    EVT PtrVT = PtrValueVTs[0];
+    for (unsigned i = 0; i != NumValues; ++i) {
+      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
+                                DAG.getConstant(Offsets[i], PtrVT));
+      Chains[i] =
+        DAG.getStore(Chain, getCurDebugLoc(),
+                     SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+                     Add, NULL, Offsets[i], false, 0);
+    }
+
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                        MVT::Other, &Chains[0], NumValues);
+  } else {
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+      unsigned NumValues = ValueVTs.size();
+      if (NumValues == 0) continue;
+
+      SDValue RetOp = getValue(I.getOperand(i));
+      for (unsigned j = 0, f = NumValues; j != f; ++j) {
+        EVT VT = ValueVTs[j];
+
+        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+        const Function *F = I.getParent()->getParent();
+        if (F->paramHasAttr(0, Attribute::SExt))
+          ExtendKind = ISD::SIGN_EXTEND;
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          ExtendKind = ISD::ZERO_EXTEND;
+
+        // FIXME: C calling convention requires the return type to be promoted
+        // to at least 32-bit. But this is not necessary for non-C calling
+        // conventions. The frontend should mark functions whose return values
+        // require promoting with signext or zeroext attributes.
+        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
+          if (VT.bitsLT(MinVT))
+            VT = MinVT;
+        }
+
+        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+        SmallVector<SDValue, 4> Parts(NumParts);
+        getCopyToParts(DAG, getCurDebugLoc(), SDNodeOrder,
+                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+                       &Parts[0], NumParts, PartVT, ExtendKind);
+
+        // 'inreg' on function refers to return value
+        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+        if (F->paramHasAttr(0, Attribute::InReg))
+          Flags.setInReg();
+
+        // Propagate extension type if any
+        if (F->paramHasAttr(0, Attribute::SExt))
+          Flags.setSExt();
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          Flags.setZExt();
+
+        for (unsigned i = 0; i < NumParts; ++i)
+          Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
+      }
+    }
+  }
+
+  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+  CallingConv::ID CallConv =
+    DAG.getMachineFunction().getFunction()->getCallingConv();
+  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+                          Outs, getCurDebugLoc(), DAG);
+
+  // Verify that the target's LowerReturn behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerReturn didn't return a valid chain!");
+
+  // Update the DAG with the new chain value resulting from return lowering.
+  DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
+  if (!V->use_empty()) {
+    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+    if (VMI != FuncInfo.ValueMap.end())
+      CopyValueToVirtualRegister(V, VMI->second);
+  }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
+  // No need to export constants.
+  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+  // Already exported?
+  if (FuncInfo.isExportedInst(V)) return;
+
+  unsigned Reg = FuncInfo.InitializeRegForValue(V);
+  CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
+                                                     const BasicBlock *FromBB) {
+  // The operands of the setcc have to be in this block.  We don't know
+  // how to export them from some other block.
+  if (Instruction *VI = dyn_cast<Instruction>(V)) {
+    // Can export from current BB.
+    if (VI->getParent() == FromBB)
+      return true;
+
+    // Is already exported, noop.
+    return FuncInfo.isExportedInst(V);
+  }
+
+  // If this is an argument, we can export it if the BB is the entry block or
+  // if it is already exported.
+  if (isa<Argument>(V)) {
+    if (FromBB == &FromBB->getParent()->getEntryBlock())
+      return true;
+
+    // Otherwise, can only export this if it is already exported.
+    return FuncInfo.isExportedInst(V);
+  }
+
+  // Otherwise, constants can always be exported.
+  return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == BB;
+  return true;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
+  ISD::CondCode FPC, FOC;
+  switch (Pred) {
+  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
+  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
+  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+  default:
+    llvm_unreachable("Invalid FCmp predicate opcode!");
+    FOC = FPC = ISD::SETFALSE;
+    break;
+  }
+  if (FiniteOnlyFPMath())
+    return FOC;
+  else
+    return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
+  switch (Pred) {
+  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
+  case ICmpInst::ICMP_NE:  return ISD::SETNE;
+  case ICmpInst::ICMP_SLE: return ISD::SETLE;
+  case ICmpInst::ICMP_ULE: return ISD::SETULE;
+  case ICmpInst::ICMP_SGE: return ISD::SETGE;
+  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+  case ICmpInst::ICMP_SLT: return ISD::SETLT;
+  case ICmpInst::ICMP_ULT: return ISD::SETULT;
+  case ICmpInst::ICMP_SGT: return ISD::SETGT;
+  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+  default:
+    llvm_unreachable("Invalid ICmp predicate opcode!");
+    return ISD::SETNE;
+  }
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
+                                                  MachineBasicBlock *TBB,
+                                                  MachineBasicBlock *FBB,
+                                                  MachineBasicBlock *CurBB) {
+  const BasicBlock *BB = CurBB->getBasicBlock();
+
+  // If the leaf of the tree is a comparison, merge the condition into
+  // the caseblock.
+  if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+    // The operands of the cmp have to be in this block.  We don't know
+    // how to export them from some other block.  If this is the first block
+    // of the sequence, no exporting is needed.
+    if (CurBB == CurMBB ||
+        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+         isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+      ISD::CondCode Condition;
+      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+        Condition = getICmpCondCode(IC->getPredicate());
+      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+        Condition = getFCmpCondCode(FC->getPredicate());
+      } else {
+        Condition = ISD::SETEQ; // silence warning.
+        llvm_unreachable("Unknown compare instruction");
+      }
+
+      CaseBlock CB(Condition, BOp->getOperand(0),
+                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+      SwitchCases.push_back(CB);
+      return;
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+               NULL, TBB, FBB, CurBB);
+  SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
+                                               MachineBasicBlock *TBB,
+                                               MachineBasicBlock *FBB,
+                                               MachineBasicBlock *CurBB,
+                                               unsigned Opc) {
+  // If this node is not part of the or/and tree, emit it as a branch.
+  Instruction *BOp = dyn_cast<Instruction>(Cond);
+  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+      BOp->getParent() != CurBB->getBasicBlock() ||
+      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
+    return;
+  }
+
+  //  Create TmpBB after CurBB.
+  MachineFunction::iterator BBI = CurBB;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+  CurBB->getParent()->insert(++BBI, TmpBB);
+
+  if (Opc == Instruction::Or) {
+    // Codegen X | Y as:
+    //   jmp_if_X TBB
+    //   jmp TmpBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+  } else {
+    assert(Opc == Instruction::And && "Unknown merge op!");
+    // Codegen X & Y as:
+    //   jmp_if_X TmpBB
+    //   jmp FBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+    //  This requires creation of TmpBB after CurBB.
+
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+  }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+  if (Cases.size() != 2) return true;
+
+  // If this is two comparisons of the same values or'd or and'd together, they
+  // will get folded into a single comparison, so don't emit two blocks.
+  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+    return false;
+  }
+
+  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+      Cases[0].CC == Cases[1].CC &&
+      isa<Constant>(Cases[0].CmpRHS) &&
+      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+    if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+      return false;
+    if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+      return false;
+  }
+  
+  return true;
+}
+
+void SelectionDAGBuilder::visitBr(BranchInst &I) {
+  // Update machine-CFG edges.
+  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  if (I.isUnconditional()) {
+    // Update machine-CFG edges.
+    CurMBB->addSuccessor(Succ0MBB);
+
+    // If this is not a fall-through branch, emit the branch.
+    if (Succ0MBB != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Succ0MBB)));
+
+    return;
+  }
+
+  // If this condition is one of the special cases we handle, do special stuff
+  // now.
+  Value *CondVal = I.getCondition();
+  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  // If this is a series of conditions that are or'd or and'd together, emit
+  // this as a sequence of branches instead of setcc's with and/or operations.
+  // For example, instead of something like:
+  //     cmp A, B
+  //     C = seteq
+  //     cmp D, E
+  //     F = setle
+  //     or C, F
+  //     jnz foo
+  // Emit:
+  //     cmp A, B
+  //     je foo
+  //     cmp D, E
+  //     jle foo
+  //
+  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+    if (BOp->hasOneUse() &&
+        (BOp->getOpcode() == Instruction::And ||
+         BOp->getOpcode() == Instruction::Or)) {
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+      // If the compares in later blocks need to use values not currently
+      // exported from this block, export them now.  This block should always
+      // be the first entry.
+      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+
+      // Allow some cases to be rejected.
+      if (ShouldEmitAsBranches(SwitchCases)) {
+        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+        }
+
+        // Emit the branch for this block.
+        visitSwitchCase(SwitchCases[0]);
+        SwitchCases.erase(SwitchCases.begin());
+        return;
+      }
+
+      // Okay, we decided not to do this, remove any inserted MBB's and clear
+      // SwitchCases.
+      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+      SwitchCases.clear();
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+               NULL, Succ0MBB, Succ1MBB, CurMBB);
+
+  // Use visitSwitchCase to actually insert the fast branch sequence for this
+  // cond branch.
+  visitSwitchCase(CB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
+  SDValue Cond;
+  SDValue CondLHS = getValue(CB.CmpLHS);
+  DebugLoc dl = getCurDebugLoc();
+
+  // Build the setcc now.
+  if (CB.CmpMHS == NULL) {
+    // Fold "(X == true)" to X and "(X == false)" to !X to
+    // handle common cases produced by branch lowering.
+    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+        CB.CC == ISD::SETEQ)
+      Cond = CondLHS;
+    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+             CB.CC == ISD::SETEQ) {
+      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+    } else
+      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+  } else {
+    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+    SDValue CmpOp = getValue(CB.CmpMHS);
+    EVT VT = CmpOp.getValueType();
+
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+                          ISD::SETLE);
+    } else {
+      SDValue SUB = DAG.getNode(ISD::SUB, dl,
+                                VT, CmpOp, DAG.getConstant(Low, VT));
+      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+                          DAG.getConstant(High-Low, VT), ISD::SETULE);
+    }
+  }
+
+  // Update successor info
+  CurMBB->addSuccessor(CB.TrueBB);
+  CurMBB->addSuccessor(CB.FalseBB);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  // If the lhs block is the next block, invert the condition so that we can
+  // fall through to the lhs instead of the rhs block.
+  if (CB.TrueBB == NextBlock) {
+    std::swap(CB.TrueBB, CB.FalseBB);
+    SDValue True = DAG.getConstant(1, Cond.getValueType());
+    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+  }
+
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+                               MVT::Other, getControlRoot(), Cond,
+                               DAG.getBasicBlock(CB.TrueBB));
+
+  // If the branch was constant folded, fix up the CFG.
+  if (BrCond.getOpcode() == ISD::BR) {
+    CurMBB->removeSuccessor(CB.FalseBB);
+  } else {
+    // Otherwise, go ahead and insert the false branch.
+    if (BrCond == getControlRoot())
+      CurMBB->removeSuccessor(CB.TrueBB);
+
+    if (CB.FalseBB != NextBlock)
+      BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                           DAG.getBasicBlock(CB.FalseBB));
+  }
+
+  DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+  // Emit the code for the jump table
+  assert(JT.Reg != -1U && "Should lower JT Header first!");
+  EVT PTy = TLI.getPointerTy();
+  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                     JT.Reg, PTy);
+  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+                                    MVT::Other, Index.getValue(1),
+                                    Table, Index);
+  DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+                                               JumpTableHeader &JTH) {
+  // Subtract the lowest switch case value from the value being switched on and
+  // conditional branch to default mbb if the result is greater than the
+  // difference between smallest and largest cases.
+  SDValue SwitchOp = getValue(JTH.SValue);
+  EVT VT = SwitchOp.getValueType();
+  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+                            DAG.getConstant(JTH.First, VT));
+
+  // The SDNode we just created, which holds the value being switched on minus
+  // the smallest case value, needs to be copied to a virtual register so it
+  // can be used as an index into the jump table in a subsequent basic block.
+  // This value may be smaller or larger than the target's pointer type, and
+  // therefore require extension or truncating.
+  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
+
+  unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+                                    JumpTableReg, SwitchOp);
+  JT.Reg = JumpTableReg;
+
+  // Emit the range check for the jump table, and branch to the default block
+  // for the switch statement if the value being switched on exceeds the largest
+  // case in the switch.
+  SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+                             TLI.getSetCCResultType(Sub.getValueType()), Sub,
+                             DAG.getConstant(JTH.Last-JTH.First,VT),
+                             ISD::SETUGT);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                               MVT::Other, CopyTo, CMP,
+                               DAG.getBasicBlock(JT.Default));
+
+  if (JT.MBB != NextBlock)
+    BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+                         DAG.getBasicBlock(JT.MBB));
+
+  DAG.setRoot(BrCond);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
+  // Subtract the minimum value
+  SDValue SwitchOp = getValue(B.SValue);
+  EVT VT = SwitchOp.getValueType();
+  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+                            DAG.getConstant(B.First, VT));
+
+  // Check range
+  SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+                                  TLI.getSetCCResultType(Sub.getValueType()),
+                                  Sub, DAG.getConstant(B.Range, VT),
+                                  ISD::SETUGT);
+
+  SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
+                                       TLI.getPointerTy());
+
+  B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+                                    B.Reg, ShiftOp);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+  CurMBB->addSuccessor(B.Default);
+  CurMBB->addSuccessor(MBB);
+
+  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                                MVT::Other, CopyTo, RangeCmp,
+                                DAG.getBasicBlock(B.Default));
+
+  if (MBB != NextBlock)
+    BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+                          DAG.getBasicBlock(MBB));
+
+  DAG.setRoot(BrRange);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+                                           unsigned Reg,
+                                           BitTestCase &B) {
+  // Make desired shift
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
+                                       TLI.getPointerTy());
+  SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+                                  TLI.getPointerTy(),
+                                  DAG.getConstant(1, TLI.getPointerTy()),
+                                  ShiftOp);
+
+  // Emit bit tests and jumps
+  SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+                              TLI.getPointerTy(), SwitchVal,
+                              DAG.getConstant(B.Mask, TLI.getPointerTy()));
+  SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
+                                TLI.getSetCCResultType(AndOp.getValueType()),
+                                AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+                                ISD::SETNE);
+
+  CurMBB->addSuccessor(B.TargetBB);
+  CurMBB->addSuccessor(NextMBB);
+
+  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              AndCmp, DAG.getBasicBlock(B.TargetBB));
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  if (NextMBB != NextBlock)
+    BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+                        DAG.getBasicBlock(NextMBB));
+
+  DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
+  // Retrieve successors.
+  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  const Value *Callee(I.getCalledValue());
+  if (isa<InlineAsm>(Callee))
+    visitInlineAsm(&I);
+  else
+    LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+  // If the value of the invoke is used outside of its defining block, make it
+  // available as a virtual register.
+  CopyToExportRegsIfNeeded(&I);
+
+  // Update successor info
+  CurMBB->addSuccessor(Return);
+  CurMBB->addSuccessor(LandingPad);
+
+  // Drop into normal successor.
+  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+                                                 CaseRecVector& WorkList,
+                                                 Value* SV,
+                                                 MachineBasicBlock* Default) {
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Size is the number of Cases represented by this range.
+  size_t Size = CR.Range.second - CR.Range.first;
+  if (Size > 3)
+    return false;
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  // TODO: If any two of the cases has the same destination, and if one value
+  // is the same as the other, but has one bit unset that the other has set,
+  // use bit manipulation to do two compares at once.  For example:
+  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+
+  // Rearrange the case blocks so that the last one falls through if possible.
+  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+    // The last case block won't fall through into 'NextBlock' if we emit the
+    // branches in this order.  See if rearranging a case value would help.
+    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+      if (I->BB == NextBlock) {
+        std::swap(*I, BackCase);
+        break;
+      }
+    }
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the Case's target mbb if the value being switched on SV is equal
+  // to C.
+  MachineBasicBlock *CurBlock = CR.CaseBB;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+    MachineBasicBlock *FallThrough;
+    if (I != E-1) {
+      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+      CurMF->insert(BBI, FallThrough);
+
+      // Put SV in a virtual register to make it available from the new blocks.
+      ExportFromCurrentBlock(SV);
+    } else {
+      // If the last case doesn't match, go to the default block.
+      FallThrough = Default;
+    }
+
+    Value *RHS, *LHS, *MHS;
+    ISD::CondCode CC;
+    if (I->High == I->Low) {
+      // This is just small small case range :) containing exactly 1 case
+      CC = ISD::SETEQ;
+      LHS = SV; RHS = I->High; MHS = NULL;
+    } else {
+      CC = ISD::SETLE;
+      LHS = I->Low; MHS = SV; RHS = I->High;
+    }
+    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+    // If emitting the first comparison, just call visitSwitchCase to emit the
+    // code into the current block.  Otherwise, push the CaseBlock onto the
+    // vector to be later processed by SDISel, and insert the node's MBB
+    // before the next MBB.
+    if (CurBlock == CurMBB)
+      visitSwitchCase(CB);
+    else
+      SwitchCases.push_back(CB);
+
+    CurBlock = FallThrough;
+  }
+
+  return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+  return !DisableJumpTables &&
+          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+  APInt LastExt(Last), FirstExt(First);
+  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+  LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+  return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
+                                             CaseRecVector& WorkList,
+                                             Value* SV,
+                                             MachineBasicBlock* Default) {
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
+
+  APInt TSize(First.getBitWidth(), 0);
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
+    return false;
+
+  APInt Range = ComputeRange(First, Last);
+  double Density = TSize.roundToDouble() / Range.roundToDouble();
+  if (Density < 0.4)
+    return false;
+
+  DEBUG(dbgs() << "Lowering jump table\n"
+               << "First entry: " << First << ". Last entry: " << Last << '\n'
+               << "Range: " << Range
+               << "Size: " << TSize << ". Density: " << Density << "\n\n");
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Create a new basic block to hold the code for loading the address
+  // of the jump table, and jumping to it.  Update successor information;
+  // we will either branch to the default case for the switch, or the jump
+  // table.
+  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+  CurMF->insert(BBI, JumpTableBB);
+  CR.CaseBB->addSuccessor(Default);
+  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  // Build a vector of destination BBs, corresponding to each target
+  // of the jump table. If the value of the jump table slot corresponds to
+  // a case statement, push the case's BB onto the vector, otherwise, push
+  // the default BB.
+  std::vector<MachineBasicBlock*> DestBBs;
+  APInt TEI = First;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+    const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
+    const APInt &High = cast<ConstantInt>(I->High)->getValue();
+
+    if (Low.sle(TEI) && TEI.sle(High)) {
+      DestBBs.push_back(I->BB);
+      if (TEI==High)
+        ++I;
+    } else {
+      DestBBs.push_back(Default);
+    }
+  }
+
+  // Update successor info. Add one edge to each unique successor.
+  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+         E = DestBBs.end(); I != E; ++I) {
+    if (!SuccsHandled[(*I)->getNumber()]) {
+      SuccsHandled[(*I)->getNumber()] = true;
+      JumpTableBB->addSuccessor(*I);
+    }
+  }
+
+  // Create a jump table index for this jump table, or return an existing
+  // one.
+  unsigned JTEncoding = TLI.getJumpTableEncoding();
+  unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
+                       ->getJumpTableIndex(DestBBs);
+
+  // Set the jump table information so that we can codegen it as a second
+  // MachineBasicBlock
+  JumpTable JT(-1U, JTI, JumpTableBB, Default);
+  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
+  if (CR.CaseBB == CurMBB)
+    visitJumpTableHeader(JT, JTH);
+
+  JTCases.push_back(JumpTableBlock(JTH, JT));
+
+  return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+                                                  CaseRecVector& WorkList,
+                                                  Value* SV,
+                                                  MachineBasicBlock* Default) {
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Size is the number of Cases represented by this range.
+  unsigned Size = CR.Range.second - CR.Range.first;
+
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
+  double FMetric = 0;
+  CaseItr Pivot = CR.Range.first + Size/2;
+
+  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+  // (heuristically) allow us to emit JumpTable's later.
+  APInt TSize(First.getBitWidth(), 0);
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  APInt LSize = FrontCase.size();
+  APInt RSize = TSize-LSize;
+  DEBUG(dbgs() << "Selecting best pivot: \n"
+               << "First: " << First << ", Last: " << Last <<'\n'
+               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+       J!=E; ++I, ++J) {
+    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+    APInt Range = ComputeRange(LEnd, RBegin);
+    assert((Range - 2ULL).isNonNegative() &&
+           "Invalid case distance");
+    double LDensity = (double)LSize.roundToDouble() /
+                           (LEnd - First + 1ULL).roundToDouble();
+    double RDensity = (double)RSize.roundToDouble() /
+                           (Last - RBegin + 1ULL).roundToDouble();
+    double Metric = Range.logBase2()*(LDensity+RDensity);
+    // Should always split in some non-trivial place
+    DEBUG(dbgs() <<"=>Step\n"
+                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+                 << "LDensity: " << LDensity
+                 << ", RDensity: " << RDensity << '\n'
+                 << "Metric: " << Metric << '\n');
+    if (FMetric < Metric) {
+      Pivot = J;
+      FMetric = Metric;
+      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
+    }
+
+    LSize += J->size();
+    RSize -= J->size();
+  }
+  if (areJTsAllowed(TLI)) {
+    // If our case is dense we *really* should handle it earlier!
+    assert((FMetric > 0) && "Should handle dense range earlier!");
+  } else {
+    Pivot = CR.Range.first + Size/2;
+  }
+
+  CaseRange LHSR(CR.Range.first, Pivot);
+  CaseRange RHSR(Pivot, CR.Range.second);
+  Constant *C = Pivot->Low;
+  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+  // We know that we branch to the LHS if the Value being switched on is
+  // less than the Pivot value, C.  We use this to optimize our binary
+  // tree a bit, by recognizing that if SV is greater than or equal to the
+  // LHS's Case Value, and that Case Value is exactly one less than the
+  // Pivot's Value, then we can branch directly to the LHS's Target,
+  // rather than creating a leaf node for it.
+  if ((LHSR.second - LHSR.first) == 1 &&
+      LHSR.first->High == CR.GE &&
+      cast<ConstantInt>(C)->getValue() ==
+      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+    TrueBB = LHSR.first->BB;
+  } else {
+    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, TrueBB);
+    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  // Similar to the optimization above, if the Value being switched on is
+  // known to be less than the Constant CR.LT, and the current Case Value
+  // is CR.LT - 1, then we can branch directly to the target block for
+  // the current Case Value, rather than emitting a RHS leaf node for it.
+  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+    FalseBB = RHSR.first->BB;
+  } else {
+    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, FalseBB);
+    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the LHS node if the value being switched on SV is less than C.
+  // Otherwise, branch to LHS.
+  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+  if (CR.CaseBB == CurMBB)
+    visitSwitchCase(CB);
+  else
+    SwitchCases.push_back(CB);
+
+  return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+                                                   CaseRecVector& WorkList,
+                                                   Value* SV,
+                                                   MachineBasicBlock* Default){
+  EVT PTy = TLI.getPointerTy();
+  unsigned IntPtrBits = PTy.getSizeInBits();
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // If target does not have legal shift left, do not emit bit tests at all.
+  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+    return false;
+
+  size_t numCmps = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I) {
+    // Single case counts one, case range - two.
+    numCmps += (I->Low == I->High ? 1 : 2);
+  }
+
+  // Count unique destinations
+  SmallSet<MachineBasicBlock*, 4> Dests;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    Dests.insert(I->BB);
+    if (Dests.size() > 3)
+      // Don't bother the code below, if there are too much unique destinations
+      return false;
+  }
+  DEBUG(dbgs() << "Total number of unique destinations: "
+        << Dests.size() << '\n'
+        << "Total number of comparisons: " << numCmps << '\n');
+
+  // Compute span of values.
+  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+  APInt cmpRange = maxValue - minValue;
+
+  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
+               << "Low bound: " << minValue << '\n'
+               << "High bound: " << maxValue << '\n');
+
+  if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
+      (!(Dests.size() == 1 && numCmps >= 3) &&
+       !(Dests.size() == 2 && numCmps >= 5) &&
+       !(Dests.size() >= 3 && numCmps >= 6)))
+    return false;
+
+  DEBUG(dbgs() << "Emitting bit tests\n");
+  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+  // Optimize the case where all the case values fit in a
+  // word without having to subtract minValue. In this case,
+  // we can optimize away the subtraction.
+  if (minValue.isNonNegative() &&
+      maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
+    cmpRange = maxValue;
+  } else {
+    lowBound = minValue;
+  }
+
+  CaseBitsVector CasesBits;
+  unsigned i, count = 0;
+
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    MachineBasicBlock* Dest = I->BB;
+    for (i = 0; i < count; ++i)
+      if (Dest == CasesBits[i].BB)
+        break;
+
+    if (i == count) {
+      assert((count < 3) && "Too much destinations to test!");
+      CasesBits.push_back(CaseBits(0, Dest, 0));
+      count++;
+    }
+
+    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+    uint64_t lo = (lowValue - lowBound).getZExtValue();
+    uint64_t hi = (highValue - lowBound).getZExtValue();
+
+    for (uint64_t j = lo; j <= hi; j++) {
+      CasesBits[i].Mask |=  1ULL << j;
+      CasesBits[i].Bits++;
+    }
+
+  }
+  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+  BitTestInfo BTC;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  DEBUG(dbgs() << "Cases:\n");
+  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
+                 << ", Bits: " << CasesBits[i].Bits
+                 << ", BB: " << CasesBits[i].BB << '\n');
+
+    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, CaseBB);
+    BTC.push_back(BitTestCase(CasesBits[i].Mask,
+                              CaseBB,
+                              CasesBits[i].BB));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  BitTestBlock BTB(lowBound, cmpRange, SV,
+                   -1U, (CR.CaseBB == CurMBB),
+                   CR.CaseBB, Default, BTC);
+
+  if (CR.CaseBB == CurMBB)
+    visitBitTestHeader(BTB);
+
+  BitTestCases.push_back(BTB);
+
+  return true;
+}
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+                                       const SwitchInst& SI) {
+  size_t numCmps = 0;
+
+  // Start with "simple" cases
+  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+    Cases.push_back(Case(SI.getSuccessorValue(i),
+                         SI.getSuccessorValue(i),
+                         SMBB));
+  }
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size() >= 2)
+    // Must recompute end() each iteration because it may be
+    // invalidated by erase if we hold on to it
+    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+      MachineBasicBlock* nextBB = J->BB;
+      MachineBasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+  // If there is only the default destination, branch to it if it is not the
+  // next basic block.  Otherwise, just fall through.
+  if (SI.getNumOperands() == 2) {
+    // Update machine-CFG edges.
+
+    // If this is not a fall-through branch, emit the branch.
+    CurMBB->addSuccessor(Default);
+    if (Default != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Default)));
+
+    return;
+  }
+
+  // If there are any non-default case statements, create a vector of Cases
+  // representing each one, and sort the vector so that we can efficiently
+  // create a binary search tree from them.
+  CaseVector Cases;
+  size_t numCmps = Clusterify(Cases, SI);
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << '\n');
+  numCmps = 0;
+
+  // Get the Value to be switched on and default basic blocks, which will be
+  // inserted into CaseBlock records, representing basic blocks in the binary
+  // search tree.
+  Value *SV = SI.getOperand(0);
+
+  // Push the initial CaseRec onto the worklist
+  CaseRecVector WorkList;
+  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+
+  while (!WorkList.empty()) {
+    // Grab a record representing a case range to process off the worklist
+    CaseRec CR = WorkList.back();
+    WorkList.pop_back();
+
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+      continue;
+
+    // If the range has few cases (two or less) emit a series of specific
+    // tests.
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+      continue;
+
+    // If the switch has more than 5 blocks, and at least 40% dense, and the
+    // target supports indirect branches, then emit a jump table rather than
+    // lowering the switch to a binary tree of conditional branches.
+    if (handleJTSwitchCase(CR, WorkList, SV, Default))
+      continue;
+
+    // Emit binary tree. We need to pick a pivot, and push left and right ranges
+    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+  }
+}
+
+void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
+  // Update machine-CFG edges.
+  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
+    CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
+
+  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitFSub(User &I) {
+  // -0.0 - X --> fneg
+  const Type *Ty = I.getType();
+  if (isa<VectorType>(Ty)) {
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
+      const VectorType *DestTy = cast<VectorType>(I.getType());
+      const Type *ElTy = DestTy->getElementType();
+      unsigned VL = DestTy->getNumElements();
+      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
+      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+      if (CV == CNZ) {
+        SDValue Op2 = getValue(I.getOperand(1));
+        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                                 Op2.getValueType(), Op2));
+        return;
+      }
+    }
+  }
+
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+      SDValue Op2 = getValue(I.getOperand(1));
+      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                               Op2.getValueType(), Op2));
+      return;
+    }
+
+  visitBinary(I, ISD::FSUB);
+}
+
+void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  if (!isa<VectorType>(I.getType()) &&
+      Op2.getValueType() != TLI.getShiftAmountTy()) {
+    // If the operand is smaller than the shift count type, promote it.
+    EVT PTy = TLI.getPointerTy();
+    EVT STy = TLI.getShiftAmountTy();
+    if (STy.bitsGT(Op2.getValueType()))
+      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+                        TLI.getShiftAmountTy(), Op2);
+    // If the operand is larger than the shift count type but the shift
+    // count type has enough bits to represent any shift value, truncate
+    // it now. This is a common case and it exposes the truncate to
+    // optimization early.
+    else if (STy.getSizeInBits() >=
+             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                        TLI.getShiftAmountTy(), Op2);
+    // Otherwise we'll need to temporarily settle for some other
+    // convenient type; type legalization will make adjustments as
+    // needed.
+    else if (PTy.bitsLT(Op2.getValueType()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                        TLI.getPointerTy(), Op2);
+    else if (PTy.bitsGT(Op2.getValueType()))
+      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+                        TLI.getPointerTy(), Op2);
+  }
+
+  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitICmp(User &I) {
+  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+    predicate = IC->getPredicate();
+  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+    predicate = ICmpInst::Predicate(IC->getPredicate());
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(User &I) {
+  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+    predicate = FC->getPredicate();
+  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+    predicate = FCmpInst::Predicate(FC->getPredicate());
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Condition = getFCmpCondCode(predicate);
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGBuilder::visitSelect(User &I) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0) return;
+
+  SmallVector<SDValue, 4> Values(NumValues);
+  SDValue Cond     = getValue(I.getOperand(0));
+  SDValue TrueVal  = getValue(I.getOperand(1));
+  SDValue FalseVal = getValue(I.getOperand(2));
+
+  for (unsigned i = 0; i != NumValues; ++i)
+    Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
+                            TrueVal.getNode()->getValueType(i), Cond,
+                            SDValue(TrueVal.getNode(),
+                                    TrueVal.getResNo() + i),
+                            SDValue(FalseVal.getNode(),
+                                    FalseVal.getResNo() + i));
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitTrunc(User &I) {
+  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(User &I) {
+  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(User &I) {
+  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // SExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(User &I) {
+  // FPTrunc is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+                           DestVT, N, DAG.getIntPtrConstant(0)));
+}
+
+void SelectionDAGBuilder::visitFPExt(User &I){
+  // FPTrunc is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(User &I) {
+  // FPToUI is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(User &I) {
+  // FPToSI is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(User &I) {
+  // UIToFP is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(User &I){
+  // SIToFP is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDValue N = getValue(I.getOperand(0));
+  EVT SrcVT = N.getValueType();
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitIntToPtr(User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDValue N = getValue(I.getOperand(0));
+  EVT SrcVT = N.getValueType();
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(User &I) {
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+
+  // BitCast assures us that source and destination are the same size so this is
+  // either a BIT_CONVERT or a no-op.
+  if (DestVT != N.getValueType())
+    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                             DestVT, N)); // convert types.
+  else
+    setValue(&I, N);            // noop cast.
+}
+
+void SelectionDAGBuilder::visitInsertElement(User &I) {
+  SDValue InVec = getValue(I.getOperand(0));
+  SDValue InVal = getValue(I.getOperand(1));
+  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+                              TLI.getPointerTy(),
+                              getValue(I.getOperand(2)));
+  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()),
+                           InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(User &I) {
+  SDValue InVec = getValue(I.getOperand(0));
+  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+                              TLI.getPointerTy(),
+                              getValue(I.getOperand(1)));
+  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+  unsigned MaskNumElts = Mask.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i)
+    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+      return false;
+  return true;
+}
+
+void SelectionDAGBuilder::visitShuffleVector(User &I) {
+  SmallVector<int, 8> Mask;
+  SDValue Src1 = getValue(I.getOperand(0));
+  SDValue Src2 = getValue(I.getOperand(1));
+
+  // Convert the ConstantVector mask operand into an array of ints, with -1
+  // representing undef values.
+  SmallVector<Constant*, 8> MaskElts;
+  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+  unsigned MaskNumElts = MaskElts.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (isa<UndefValue>(MaskElts[i]))
+      Mask.push_back(-1);
+    else
+      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+  }
+
+  EVT VT = TLI.getValueType(I.getType());
+  EVT SrcVT = Src1.getValueType();
+  unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+  if (SrcNumElts == MaskNumElts) {
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &Mask[0]));
+    return;
+  }
+
+  // Normalize the shuffle vector since mask and vector length don't match.
+  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+    // Mask is longer than the source vectors and is a multiple of the source
+    // vectors.  We can use concatenate vector to make the mask and vectors
+    // lengths match.
+    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+      // The shuffle is concatenating two vectors together.
+      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                               VT, Src1, Src2));
+      return;
+    }
+
+    // Pad both vectors with undefs to make them the same length as the mask.
+    unsigned NumConcat = MaskNumElts / SrcNumElts;
+    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+    SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+    MOps1[0] = Src1;
+    MOps2[0] = Src2;
+
+    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT,
+                                                  &MOps1[0], NumConcat);
+    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT,
+                                                  &MOps2[0], NumConcat);
+
+    // Readjust mask for new input vector length.
+    SmallVector<int, 8> MappedOps;
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      if (Idx < (int)SrcNumElts)
+        MappedOps.push_back(Idx);
+      else
+        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+    }
+
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &MappedOps[0]));
+    return;
+  }
+
+  if (SrcNumElts > MaskNumElts) {
+    // Analyze the access pattern of the vector to see if we can extract
+    // two subvectors and do the shuffle. The analysis is done by calculating
+    // the range of elements the mask access on both vectors.
+    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+    int MaxRange[2] = {-1, -1};
+
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      int Input = 0;
+      if (Idx < 0)
+        continue;
+
+      if (Idx >= (int)SrcNumElts) {
+        Input = 1;
+        Idx -= SrcNumElts;
+      }
+      if (Idx > MaxRange[Input])
+        MaxRange[Input] = Idx;
+      if (Idx < MinRange[Input])
+        MinRange[Input] = Idx;
+    }
+
+    // Check if the access is smaller than the vector size and can we find
+    // a reasonable extract index.
+    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
+                                 // Extract.
+    int StartIdx[2];  // StartIdx to extract from
+    for (int Input=0; Input < 2; ++Input) {
+      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+        RangeUse[Input] = 0; // Unused
+        StartIdx[Input] = 0;
+      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
+        // Fits within range but we should see if we can find a good
+        // start index that is a multiple of the mask length.
+        if (MaxRange[Input] < (int)MaskNumElts) {
+          RangeUse[Input] = 1; // Extract from beginning of the vector
+          StartIdx[Input] = 0;
+        } else {
+          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+              StartIdx[Input] + MaskNumElts < SrcNumElts)
+            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+        }
+      }
+    }
+
+    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+      setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+      return;
+    }
+    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+      // Extract appropriate subvector and generate a vector shuffle
+      for (int Input=0; Input < 2; ++Input) {
+        SDValue &Src = Input == 0 ? Src1 : Src2;
+        if (RangeUse[Input] == 0)
+          Src = DAG.getUNDEF(VT);
+        else
+          Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
+      }
+
+      // Calculate new mask.
+      SmallVector<int, 8> MappedOps;
+      for (unsigned i = 0; i != MaskNumElts; ++i) {
+        int Idx = Mask[i];
+        if (Idx < 0)
+          MappedOps.push_back(Idx);
+        else if (Idx < (int)SrcNumElts)
+          MappedOps.push_back(Idx - StartIdx[0]);
+        else
+          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+      }
+
+      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                        &MappedOps[0]));
+      return;
+    }
+  }
+
+  // We can't use either concat vectors or extract subvectors so fall back to
+  // replacing the shuffle with extract and build vector.
+  // to insert and build vector.
+  EVT EltVT = VT.getVectorElementType();
+  EVT PtrVT = TLI.getPointerTy();
+  SmallVector<SDValue,8> Ops;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (Mask[i] < 0) {
+      Ops.push_back(DAG.getUNDEF(EltVT));
+    } else {
+      int Idx = Mask[i];
+      SDValue Res;
+
+      if (Idx < (int)SrcNumElts)
+        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                          EltVT, Src1, DAG.getConstant(Idx, PtrVT));
+      else
+        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                          EltVT, Src2,
+                          DAG.getConstant(Idx - SrcNumElts, PtrVT));
+
+      Ops.push_back(Res);
+    }
+  }
+
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                           VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
+  const Value *Op0 = I.getOperand(0);
+  const Value *Op1 = I.getOperand(1);
+  const Type *AggTy = I.getType();
+  const Type *ValTy = Op1->getType();
+  bool IntoUndef = isa<UndefValue>(Op0);
+  bool FromUndef = isa<UndefValue>(Op1);
+
+  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+                                            I.idx_begin(), I.idx_end());
+
+  SmallVector<EVT, 4> AggValueVTs;
+  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+  SmallVector<EVT, 4> ValValueVTs;
+  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+  unsigned NumAggValues = AggValueVTs.size();
+  unsigned NumValValues = ValValueVTs.size();
+  SmallVector<SDValue, 4> Values(NumAggValues);
+
+  SDValue Agg = getValue(Op0);
+  SDValue Val = getValue(Op1);
+  unsigned i = 0;
+  // Copy the beginning value(s) from the original aggregate.
+  for (; i != LinearIndex; ++i)
+    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Agg.getNode(), Agg.getResNo() + i);
+  // Copy values from the inserted value(s).
+  for (; i != LinearIndex + NumValValues; ++i)
+    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  // Copy remaining value(s) from the original aggregate.
+  for (; i != NumAggValues; ++i)
+    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
+                           &Values[0], NumAggValues));
+}
+
+void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
+  const Value *Op0 = I.getOperand(0);
+  const Type *AggTy = Op0->getType();
+  const Type *ValTy = I.getType();
+  bool OutOfUndef = isa<UndefValue>(Op0);
+
+  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+                                            I.idx_begin(), I.idx_end());
+
+  SmallVector<EVT, 4> ValValueVTs;
+  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+  unsigned NumValValues = ValValueVTs.size();
+  SmallVector<SDValue, 4> Values(NumValValues);
+
+  SDValue Agg = getValue(Op0);
+  // Copy out the selected value(s).
+  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+    Values[i - LinearIndex] =
+      OutOfUndef ?
+        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+        SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValValueVTs[0], NumValValues),
+                           &Values[0], NumValValues));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(User &I) {
+  SDValue N = getValue(I.getOperand(0));
+  const Type *Ty = I.getOperand(0)->getType();
+
+  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+       OI != E; ++OI) {
+    Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+                        DAG.getIntPtrConstant(Offset));
+      }
+
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->getZExtValue() == 0) continue;
+        uint64_t Offs =
+            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        SDValue OffsVal;
+        EVT PTy = TLI.getPointerTy();
+        unsigned PtrBits = PTy.getSizeInBits();
+        if (PtrBits < 64)
+          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                                TLI.getPointerTy(),
+                                DAG.getConstant(Offs, MVT::i64));
+        else
+          OffsVal = DAG.getIntPtrConstant(Offs);
+
+        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+                        OffsVal);
+        continue;
+      }
+
+      // N = N + Idx * ElementSize;
+      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+                                TD->getTypeAllocSize(Ty));
+      SDValue IdxN = getValue(Idx);
+
+      // If the index is smaller or larger than intptr_t, truncate or extend
+      // it.
+      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
+
+      // If this is a multiply by a power of two, turn it into a shl
+      // immediately.  This is a very common case.
+      if (ElementSize != 1) {
+        if (ElementSize.isPowerOf2()) {
+          unsigned Amt = ElementSize.logBase2();
+          IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+                             N.getValueType(), IdxN,
+                             DAG.getConstant(Amt, TLI.getPointerTy()));
+        } else {
+          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+          IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+                             N.getValueType(), IdxN, Scale);
+        }
+      }
+
+      N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                      N.getValueType(), N, IdxN);
+    }
+  }
+
+  setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
+  // If this is a fixed sized alloca in the entry block of the function,
+  // allocate it statically on the stack.
+  if (FuncInfo.StaticAllocaMap.count(&I))
+    return;   // getValue will auto-populate this.
+
+  const Type *Ty = I.getAllocatedType();
+  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+  unsigned Align =
+    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+             I.getAlignment());
+
+  SDValue AllocSize = getValue(I.getArraySize());
+
+  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
+                          AllocSize,
+                          DAG.getConstant(TySize, AllocSize.getValueType()));
+
+  EVT IntPtr = TLI.getPointerTy();
+  AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+  // Handle alignment.  If the requested alignment is less than or equal to
+  // the stack alignment, ignore it.  If the size is greater than or equal to
+  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+  unsigned StackAlign =
+    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  if (Align <= StackAlign)
+    Align = 0;
+
+  // Round the size of the allocation up to the stack alignment size
+  // by add SA-1 to the size.
+  AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                          AllocSize.getValueType(), AllocSize,
+                          DAG.getIntPtrConstant(StackAlign-1));
+
+  // Mask out the low bits for alignment purposes.
+  AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+                          AllocSize.getValueType(), AllocSize,
+                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+                            VTs, Ops, 3);
+  setValue(&I, DSA);
+  DAG.setRoot(DSA.getValue(1));
+
+  // Inform the Frame Information that we have just allocated a variable-sized
+  // object.
+  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
+}
+
+void SelectionDAGBuilder::visitLoad(LoadInst &I) {
+  const Value *SV = I.getOperand(0);
+  SDValue Ptr = getValue(SV);
+
+  const Type *Ty = I.getType();
+  bool isVolatile = I.isVolatile();
+  unsigned Alignment = I.getAlignment();
+
+  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0)
+    return;
+
+  SDValue Root;
+  bool ConstantMemory = false;
+  if (I.isVolatile())
+    // Serialize volatile loads with other side effects.
+    Root = getRoot();
+  else if (AA->pointsToConstantMemory(SV)) {
+    // Do not serialize (non-volatile) loads of constant memory with anything.
+    Root = DAG.getEntryNode();
+    ConstantMemory = true;
+  } else {
+    // Do not serialize non-volatile loads against each other.
+    Root = DAG.getRoot();
+  }
+
+  SmallVector<SDValue, 4> Values(NumValues);
+  SmallVector<SDValue, 4> Chains(NumValues);
+  EVT PtrVT = Ptr.getValueType();
+  for (unsigned i = 0; i != NumValues; ++i) {
+    SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                            PtrVT, Ptr,
+                            DAG.getConstant(Offsets[i], PtrVT));
+    SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+                            A, SV, Offsets[i], isVolatile, Alignment);
+
+    Values[i] = L;
+    Chains[i] = L.getValue(1);
+  }
+
+  if (!ConstantMemory) {
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                MVT::Other, &Chains[0], NumValues);
+    if (isVolatile)
+      DAG.setRoot(Chain);
+    else
+      PendingLoads.push_back(Chain);
+  }
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitStore(StoreInst &I) {
+  Value *SrcV = I.getOperand(0);
+  Value *PtrV = I.getOperand(1);
+
+  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0)
+    return;
+
+  // Get the lowered operands. Note that we do this after
+  // checking if NumResults is zero, because with zero results
+  // the operands won't have values in the map.
+  SDValue Src = getValue(SrcV);
+  SDValue Ptr = getValue(PtrV);
+
+  SDValue Root = getRoot();
+  SmallVector<SDValue, 4> Chains(NumValues);
+  EVT PtrVT = Ptr.getValueType();
+  bool isVolatile = I.isVolatile();
+  unsigned Alignment = I.getAlignment();
+
+  for (unsigned i = 0; i != NumValues; ++i) {
+    SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
+                              DAG.getConstant(Offsets[i], PtrVT));
+    Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
+                             SDValue(Src.getNode(), Src.getResNo() + i),
+                             Add, PtrV, Offsets[i], isVolatile, Alignment);
+  }
+
+  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                          MVT::Other, &Chains[0], NumValues));
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
+                                               unsigned Intrinsic) {
+  bool HasChain = !I.doesNotAccessMemory();
+  bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+  // Build the operand list.
+  SmallVector<SDValue, 8> Ops;
+  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
+    if (OnlyLoad) {
+      // We don't need to serialize loads against other loads.
+      Ops.push_back(DAG.getRoot());
+    } else {
+      Ops.push_back(getRoot());
+    }
+  }
+
+  // Info is set by getTgtMemInstrinsic
+  TargetLowering::IntrinsicInfo Info;
+  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+  if (!IsTgtIntrinsic)
+    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+  // Add all operands of the call to the operand list.
+  for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+    SDValue Op = getValue(I.getOperand(i));
+    assert(TLI.isTypeLegal(Op.getValueType()) &&
+           "Intrinsic uses a non-legal type?");
+    Ops.push_back(Op);
+  }
+
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+#ifndef NDEBUG
+  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
+    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
+           "Intrinsic uses a non-legal type?");
+  }
+#endif // NDEBUG
+
+  if (HasChain)
+    ValueVTs.push_back(MVT::Other);
+
+  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+
+  // Create the node.
+  SDValue Result;
+  if (IsTgtIntrinsic) {
+    // This is target intrinsic that touches memory
+    Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+                                     VTs, &Ops[0], Ops.size(),
+                                     Info.memVT, Info.ptrVal, Info.offset,
+                                     Info.align, Info.vol,
+                                     Info.readMem, Info.writeMem);
+  } else if (!HasChain) {
+    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  } else if (!I.getType()->isVoidTy()) {
+    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  } else {
+    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  }
+
+  if (HasChain) {
+    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+    if (OnlyLoad)
+      PendingLoads.push_back(Chain);
+    else
+      DAG.setRoot(Chain);
+  }
+
+  if (!I.getType()->isVoidTy()) {
+    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+      EVT VT = TLI.getValueType(PTy);
+      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+    }
+
+    setValue(&I, Result);
+  }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+///   Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
+  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+                           DAG.getConstant(0x007fffff, MVT::i32));
+  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+                           DAG.getConstant(0x3f800000, MVT::i32));
+  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+            DebugLoc dl, unsigned Order) {
+  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+                           DAG.getConstant(0x7f800000, MVT::i32));
+  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+                           DAG.getConstant(23, TLI.getPointerTy()));
+  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+                           DAG.getConstant(127, MVT::i32));
+  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+}
+
+/// Inlined utility function to implement binary input atomic intrinsics for
+/// visitIntrinsicCall: I is a call instruction
+///                     Op is the associated NodeType for I
+const char *
+SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+  SDValue Root = getRoot();
+  SDValue L =
+    DAG.getAtomic(Op, getCurDebugLoc(),
+                  getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+                  Root,
+                  getValue(I.getOperand(1)),
+                  getValue(I.getOperand(2)),
+                  I.getOperand(1));
+  setValue(&I, L);
+  DAG.setRoot(L.getValue(1));
+  return 0;
+}
+
+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
+const char *
+SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+  SDValue Op1 = getValue(I.getOperand(1));
+  SDValue Op2 = getValue(I.getOperand(2));
+
+  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+  setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+  return 0;
+}
+
+/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+
+    // Put the exponent in the right bit position for later addition to the
+    // final result:
+    //
+    //   #define LOG2OFe 1.4426950f
+    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+                             getF32Constant(DAG, 0x3fb8aa3b));
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                               TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // 0.000107046256 error, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                               TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      //
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+                                             MVT::i32, t13);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FEXP, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+    // Scale the exponent by log(2) [0.69314718f].
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+                                        getF32Constant(DAG, 0x3f317218));
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   LogofMantissa =
+      //     -1.1609546f +
+      //       (1.4034025f - 0.23903021f * x) * x;
+      //
+      // error 0.0034276066, which is better than 8 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbe74c456));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3fb3a2b1));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                          getF32Constant(DAG, 0x3f949a29));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   LogOfMantissa =
+      //     -1.7417939f +
+      //       (2.8212026f +
+      //         (-1.4699568f +
+      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+      //
+      // error 0.000061011436, which is 14 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbd67b6d6));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3ee4f4b8));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3fbc278b));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40348e95));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                          getF32Constant(DAG, 0x3fdef31a));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   LogOfMantissa =
+      //     -2.1072184f +
+      //       (4.2372794f +
+      //         (-3.7029485f +
+      //           (2.2781945f +
+      //             (-0.87823314f +
+      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+      //
+      // error 0.0000023660568, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbc91e5ac));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e4350aa));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f60d3e3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x4011cdf0));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x406cfd1c));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x408797cb));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                          getF32Constant(DAG, 0x4006dcab));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog2(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+    // Get the exponent.
+    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+
+    // Different possible minimax approximations of significand in
+    // floating-point for various degrees of accuracy over [1,2].
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+      //
+      // error 0.0049451742, which is more than 7 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbeb08fe0));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x40019463));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                           getF32Constant(DAG, 0x3fd6633d));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   Log2ofMantissa =
+      //     -2.51285454f +
+      //       (4.07009056f +
+      //         (-2.12067489f +
+      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+      //
+      // error 0.0000876136000, which is better than 13 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbda7262e));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3f25280b));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x4007b923));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40823e2f));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                           getF32Constant(DAG, 0x4020d29c));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   Log2ofMantissa =
+      //     -3.0400495f +
+      //       (6.1129976f +
+      //         (-5.3420409f +
+      //           (3.2865683f +
+      //             (-1.2669343f +
+      //               (0.27515199f -
+      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+      //
+      // error 0.0000018516, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbcd2769e));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e8ce0b9));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3fa22ae7));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40525723));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x40aaf200));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x40c39dad));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                           getF32Constant(DAG, 0x4042902c));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG2, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog10(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+    // Scale the exponent by log10(2) [0.30102999f].
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+                                        getF32Constant(DAG, 0x3e9a209a));
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   Log10ofMantissa =
+      //     -0.50419619f +
+      //       (0.60948995f - 0.10380950f * x) * x;
+      //
+      // error 0.0014886165, which is 6 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbdd49a13));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3f1c0789));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                            getF32Constant(DAG, 0x3f011300));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   Log10ofMantissa =
+      //     -0.64831180f +
+      //       (0.91751397f +
+      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+      //
+      // error 0.00019228036, which is better than 12 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3d431f31));
+      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3ea21fb2));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f6ae232));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                                            getF32Constant(DAG, 0x3f25f7c3));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   Log10ofMantissa =
+      //     -0.84299375f +
+      //       (1.5327582f +
+      //         (-1.0688956f +
+      //           (0.49102474f +
+      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+      //
+      // error 0.0000037995730, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3c5d51ce));
+      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e00685a));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3efb6798));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f88d192));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3fc4316c));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+                                            getF32Constant(DAG, 0x3f57ce70));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG10, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp2(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+    //   FractionalPartOfX = x - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // error 0.000107046256, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FEXP2, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+void
+SelectionDAGBuilder::visitPow(CallInst &I) {
+  SDValue result;
+  Value *Val = I.getOperand(1);
+  DebugLoc dl = getCurDebugLoc();
+  bool IsExp10 = false;
+
+  if (getValue(Val).getValueType() == MVT::f32 &&
+      getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
+      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+        APFloat Ten(10.0f);
+        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
+      }
+    }
+  }
+
+  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(2));
+
+    // Put the exponent in the right bit position for later addition to the
+    // final result:
+    //
+    //   #define LOG2OF10 3.3219281f
+    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+                             getF32Constant(DAG, 0x40549a78));
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+    //   FractionalPartOfX = x - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   twoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // error 0.000107046256, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FPOW, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)),
+                         getValue(I.getOperand(2)));
+  }
+
+  setValue(&I, result);
+}
+
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
+                          SelectionDAG &DAG) {
+  // If RHS is a constant, we can expand this out to a multiplication tree,
+  // otherwise we end up lowering to a call to __powidf2 (for example).  When
+  // optimizing for size, we only want to do this if the expansion would produce
+  // a small number of multiplies, otherwise we do the full expansion.
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+    // Get the exponent as a positive value.
+    unsigned Val = RHSC->getSExtValue();
+    if ((int)Val < 0) Val = -Val;
+
+    // powi(x, 0) -> 1.0
+    if (Val == 0)
+      return DAG.getConstantFP(1.0, LHS.getValueType());
+
+    Function *F = DAG.getMachineFunction().getFunction();
+    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+        // If optimizing for size, don't insert too many multiplies.  This
+        // inserts up to 5 multiplies.
+        CountPopulation_32(Val)+Log2_32(Val) < 7) {
+      // We use the simple binary decomposition method to generate the multiply
+      // sequence.  There are more optimal ways to do this (for example,
+      // powi(x,15) generates one more multiply than it should), but this has
+      // the benefit of being both really simple and much better than a libcall.
+      SDValue Res;  // Logically starts equal to 1.0
+      SDValue CurSquare = LHS;
+      while (Val) {
+        if (Val & 1) {
+          if (Res.getNode())
+            Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+          else
+            Res = CurSquare;  // 1.0*CurSquare.
+        }
+
+        CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+                                CurSquare, CurSquare);
+        Val >>= 1;
+      }
+
+      // If the original was negative, invert the result, producing 1/(x*x*x).
+      if (RHSC->getSExtValue() < 0)
+        Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+                          DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+      return Res;
+    }
+  }
+
+  // Otherwise, expand to a libcall.
+  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+  DebugLoc dl = getCurDebugLoc();
+  SDValue Res;
+
+  switch (Intrinsic) {
+  default:
+    // By default, turn this into a target intrinsic node.
+    visitTargetIntrinsic(I, Intrinsic);
+    return 0;
+  case Intrinsic::vastart:  visitVAStart(I); return 0;
+  case Intrinsic::vaend:    visitVAEnd(I); return 0;
+  case Intrinsic::vacopy:   visitVACopy(I); return 0;
+  case Intrinsic::returnaddress:
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::frameaddress:
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::setjmp:
+    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+  case Intrinsic::longjmp:
+    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+  case Intrinsic::memcpy: {
+    SDValue Op1 = getValue(I.getOperand(1));
+    SDValue Op2 = getValue(I.getOperand(2));
+    SDValue Op3 = getValue(I.getOperand(3));
+    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+                              I.getOperand(1), 0, I.getOperand(2), 0));
+    return 0;
+  }
+  case Intrinsic::memset: {
+    SDValue Op1 = getValue(I.getOperand(1));
+    SDValue Op2 = getValue(I.getOperand(2));
+    SDValue Op3 = getValue(I.getOperand(3));
+    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
+                              I.getOperand(1), 0));
+    return 0;
+  }
+  case Intrinsic::memmove: {
+    SDValue Op1 = getValue(I.getOperand(1));
+    SDValue Op2 = getValue(I.getOperand(2));
+    SDValue Op3 = getValue(I.getOperand(3));
+    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+
+    // If the source and destination are known to not be aliases, we can
+    // lower memmove as memcpy.
+    uint64_t Size = -1ULL;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
+      Size = C->getZExtValue();
+    if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+        AliasAnalysis::NoAlias) {
+      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+                                I.getOperand(1), 0, I.getOperand(2), 0));
+      return 0;
+    }
+
+    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
+                               I.getOperand(1), 0, I.getOperand(2), 0));
+    return 0;
+  }
+  case Intrinsic::dbg_declare: {
+    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
+    // The real handling of this intrinsic is in FastISel.
+    if (OptLevel != CodeGenOpt::None)
+      // FIXME: Variable debug info is not supported here.
+      return 0;
+    DwarfWriter *DW = DAG.getDwarfWriter();
+    if (!DW)
+      return 0;
+    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+      return 0;
+
+    MDNode *Variable = DI.getVariable();
+    Value *Address = DI.getAddress();
+    if (!Address)
+      return 0;
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+      Address = BCI->getOperand(0);
+    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI)
+      return 0;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI == FuncInfo.StaticAllocaMap.end())
+      return 0; // VLAs.
+    int FI = SI->second;
+
+    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
+      if (MDNode *Dbg = DI.getMetadata("dbg"))
+        MMI->setVariableDbgInfo(Variable, FI, Dbg);
+    return 0;
+  }
+  case Intrinsic::dbg_value: {
+    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
+    // The real handling of this intrinsic is in FastISel.
+    if (OptLevel != CodeGenOpt::None)
+      // FIXME: Variable debug info is not supported here.
+      return 0;
+    DwarfWriter *DW = DAG.getDwarfWriter();
+    if (!DW)
+      return 0;
+    DbgValueInst &DI = cast<DbgValueInst>(I);
+    if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+      return 0;
+
+    MDNode *Variable = DI.getVariable();
+    Value *V = DI.getValue();
+    if (!V)
+      return 0;
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+      V = BCI->getOperand(0);
+    AllocaInst *AI = dyn_cast<AllocaInst>(V);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI)
+      return 0;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI == FuncInfo.StaticAllocaMap.end())
+      return 0; // VLAs.
+    int FI = SI->second;
+    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
+      if (MDNode *Dbg = DI.getMetadata("dbg"))
+        MMI->setVariableDbgInfo(Variable, FI, Dbg);
+    return 0;
+  }
+  case Intrinsic::eh_exception: {
+    // Insert the EXCEPTIONADDR instruction.
+    assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[1];
+    Ops[0] = DAG.getRoot();
+    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+    setValue(&I, Op);
+    DAG.setRoot(Op.getValue(1));
+    return 0;
+  }
+
+  case Intrinsic::eh_selector: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+    if (CurMBB->isLandingPad())
+      AddCatchInfo(I, MMI, CurMBB);
+    else {
+#ifndef NDEBUG
+      FuncInfo.CatchInfoLost.insert(&I);
+#endif
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      if (Reg) CurMBB->addLiveIn(Reg);
+    }
+
+    // Insert the EHSELECTION instruction.
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[2];
+    Ops[0] = getValue(I.getOperand(1));
+    Ops[1] = getRoot();
+    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+    DAG.setRoot(Op.getValue(1));
+    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
+    return 0;
+  }
+
+  case Intrinsic::eh_typeid_for: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+    if (MMI) {
+      // Find the type id for the given typeinfo.
+      GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+      unsigned TypeID = MMI->getTypeIDFor(GV);
+      Res = DAG.getConstant(TypeID, MVT::i32);
+    } else {
+      // Return something different to eh_selector.
+      Res = DAG.getConstant(1, MVT::i32);
+    }
+
+    setValue(&I, Res);
+    return 0;
+  }
+
+  case Intrinsic::eh_return_i32:
+  case Intrinsic::eh_return_i64:
+    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+      MMI->setCallsEHReturn(true);
+      DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+                              MVT::Other,
+                              getControlRoot(),
+                              getValue(I.getOperand(1)),
+                              getValue(I.getOperand(2))));
+    } else {
+      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+    }
+
+    return 0;
+  case Intrinsic::eh_unwind_init:
+    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+      MMI->setCallsUnwindInit(true);
+    }
+    return 0;
+  case Intrinsic::eh_dwarf_cfa: {
+    EVT VT = getValue(I.getOperand(1)).getValueType();
+    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
+                                        TLI.getPointerTy());
+    SDValue Offset = DAG.getNode(ISD::ADD, dl,
+                                 TLI.getPointerTy(),
+                                 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+                                             TLI.getPointerTy()),
+                                 CfaArg);
+    SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
+                             TLI.getPointerTy(),
+                             DAG.getConstant(0, TLI.getPointerTy()));
+    setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+                             FA, Offset));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_callsite: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+    assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+    assert(MMI->getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+    MMI->setCurrentCallSite(CI->getZExtValue());
+    return 0;
+  }
+
+  case Intrinsic::convertff:
+  case Intrinsic::convertfsi:
+  case Intrinsic::convertfui:
+  case Intrinsic::convertsif:
+  case Intrinsic::convertuif:
+  case Intrinsic::convertss:
+  case Intrinsic::convertsu:
+  case Intrinsic::convertus:
+  case Intrinsic::convertuu: {
+    ISD::CvtCode Code = ISD::CVT_INVALID;
+    switch (Intrinsic) {
+    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
+    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
+    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
+    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
+    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
+    }
+    EVT DestVT = TLI.getValueType(I.getType());
+    Value *Op1 = I.getOperand(1);
+    Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+                               DAG.getValueType(DestVT),
+                               DAG.getValueType(getValue(Op1).getValueType()),
+                               getValue(I.getOperand(2)),
+                               getValue(I.getOperand(3)),
+                               Code);
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::sqrt:
+    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::powi:
+    setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)),
+                            getValue(I.getOperand(2)), DAG));
+    return 0;
+  case Intrinsic::sin:
+    setValue(&I, DAG.getNode(ISD::FSIN, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::cos:
+    setValue(&I, DAG.getNode(ISD::FCOS, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::log:
+    visitLog(I);
+    return 0;
+  case Intrinsic::log2:
+    visitLog2(I);
+    return 0;
+  case Intrinsic::log10:
+    visitLog10(I);
+    return 0;
+  case Intrinsic::exp:
+    visitExp(I);
+    return 0;
+  case Intrinsic::exp2:
+    visitExp2(I);
+    return 0;
+  case Intrinsic::pow:
+    visitPow(I);
+    return 0;
+  case Intrinsic::pcmarker: {
+    SDValue Tmp = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::readcyclecounter: {
+    SDValue Op = getRoot();
+    Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+                      DAG.getVTList(MVT::i64, MVT::Other),
+                      &Op, 1);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::bswap:
+    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::cttz: {
+    SDValue Arg = getValue(I.getOperand(1));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::ctlz: {
+    SDValue Arg = getValue(I.getOperand(1));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::ctpop: {
+    SDValue Arg = getValue(I.getOperand(1));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::stacksave: {
+    SDValue Op = getRoot();
+    Res = DAG.getNode(ISD::STACKSAVE, dl,
+                      DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::stackrestore: {
+    Res = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
+    return 0;
+  }
+  case Intrinsic::stackprotector: {
+    // Emit code into the DAG to store the stack guard onto the stack.
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    EVT PtrTy = TLI.getPointerTy();
+
+    SDValue Src = getValue(I.getOperand(1));   // The guard's value.
+    AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+
+    int FI = FuncInfo.StaticAllocaMap[Slot];
+    MFI->setStackProtectorIndex(FI);
+
+    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+    // Store the stack protector onto the stack.
+    Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+                       PseudoSourceValue::getFixedStack(FI),
+                       0, true);
+    setValue(&I, Res);
+    DAG.setRoot(Res);
+    return 0;
+  }
+  case Intrinsic::objectsize: {
+    // If we don't know by now, we're never going to know.
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+
+    assert(CI && "Non-constant type in __builtin_object_size?");
+
+    SDValue Arg = getValue(I.getOperand(0));
+    EVT Ty = Arg.getValueType();
+
+    if (CI->getZExtValue() == 0)
+      Res = DAG.getConstant(-1ULL, Ty);
+    else
+      Res = DAG.getConstant(0, Ty);
+
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::var_annotation:
+    // Discard annotate attributes
+    return 0;
+
+  case Intrinsic::init_trampoline: {
+    const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
+
+    SDValue Ops[6];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getOperand(1));
+    Ops[2] = getValue(I.getOperand(2));
+    Ops[3] = getValue(I.getOperand(3));
+    Ops[4] = DAG.getSrcValue(I.getOperand(1));
+    Ops[5] = DAG.getSrcValue(F);
+
+    Res = DAG.getNode(ISD::TRAMPOLINE, dl,
+                      DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+                      Ops, 6);
+
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::gcroot:
+    if (GFI) {
+      Value *Alloca = I.getOperand(1);
+      Constant *TypeMap = cast<Constant>(I.getOperand(2));
+
+      FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+      GFI->addStackRoot(FI->getIndex(), TypeMap);
+    }
+    return 0;
+  case Intrinsic::gcread:
+  case Intrinsic::gcwrite:
+    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+    return 0;
+  case Intrinsic::flt_rounds:
+    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+    return 0;
+  case Intrinsic::trap:
+    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+    return 0;
+  case Intrinsic::uadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::UADDO);
+  case Intrinsic::sadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::SADDO);
+  case Intrinsic::usub_with_overflow:
+    return implVisitAluOverflow(I, ISD::USUBO);
+  case Intrinsic::ssub_with_overflow:
+    return implVisitAluOverflow(I, ISD::SSUBO);
+  case Intrinsic::umul_with_overflow:
+    return implVisitAluOverflow(I, ISD::UMULO);
+  case Intrinsic::smul_with_overflow:
+    return implVisitAluOverflow(I, ISD::SMULO);
+
+  case Intrinsic::prefetch: {
+    SDValue Ops[4];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getOperand(1));
+    Ops[2] = getValue(I.getOperand(2));
+    Ops[3] = getValue(I.getOperand(3));
+    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+    return 0;
+  }
+
+  case Intrinsic::memory_barrier: {
+    SDValue Ops[6];
+    Ops[0] = getRoot();
+    for (int x = 1; x < 6; ++x)
+      Ops[x] = getValue(I.getOperand(x));
+
+    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
+    return 0;
+  }
+  case Intrinsic::atomic_cmp_swap: {
+    SDValue Root = getRoot();
+    SDValue L =
+      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+                    getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+                    Root,
+                    getValue(I.getOperand(1)),
+                    getValue(I.getOperand(2)),
+                    getValue(I.getOperand(3)),
+                    I.getOperand(1));
+    setValue(&I, L);
+    DAG.setRoot(L.getValue(1));
+    return 0;
+  }
+  case Intrinsic::atomic_load_add:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
+  case Intrinsic::atomic_load_sub:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
+  case Intrinsic::atomic_load_or:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
+  case Intrinsic::atomic_load_xor:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
+  case Intrinsic::atomic_load_and:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
+  case Intrinsic::atomic_load_nand:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
+  case Intrinsic::atomic_load_max:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
+  case Intrinsic::atomic_load_min:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
+  case Intrinsic::atomic_load_umin:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
+  case Intrinsic::atomic_load_umax:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
+  case Intrinsic::atomic_swap:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+  case Intrinsic::invariant_start:
+  case Intrinsic::lifetime_start:
+    // Discard region information.
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return 0;
+  case Intrinsic::invariant_end:
+  case Intrinsic::lifetime_end:
+    // Discard region information.
+    return 0;
+  }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+static bool
+isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
+                     const TargetLowering &TLI) {
+  const Instruction *I = CS.getInstruction();
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+  const Function *F = ExitBB->getParent();
+
+  // The block must end in a return statement or unreachable.
+  //
+  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+  // an unreachable, for now. The way tailcall optimization is currently
+  // implemented means it will add an epilogue followed by a jump. That is
+  // not profitable. Also, if the callee is a special function (e.g.
+  // longjmp on x86), it can end up causing miscompilation that has not
+  // been fully understood.
+  if (!Ret &&
+      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (isa<PointerType>(U->getOperand(0)->getType()) &&
+          isa<PointerType>(U->getType()))))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
+
+void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
+                                      bool isTailCall,
+                                      MachineBasicBlock *LandingPad) {
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  const Type *RetTy = FTy->getReturnType();
+  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+  unsigned BeginLabel = 0, EndLabel = 0;
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Args.reserve(CS.arg_size());
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<EVT, 4> OutVTs;
+  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+  SmallVector<uint64_t, 4> Offsets;
+  getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+                OutVTs, OutsFlags, TLI, &Offsets);
+
+  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+                        FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+
+  SDValue DemoteStackSlot;
+
+  if (!CanLowerReturn) {
+    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+                      FTy->getReturnType());
+    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
+                      FTy->getReturnType());
+    MachineFunction &MF = DAG.getMachineFunction();
+    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+    Entry.Node = DemoteStackSlot;
+    Entry.Ty = StackSlotPtrType;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isInReg = false;
+    Entry.isSRet = true;
+    Entry.isNest = false;
+    Entry.isByVal = false;
+    Entry.Alignment = Align;
+    Args.push_back(Entry);
+    RetTy = Type::getVoidTy(FTy->getContext());
+  }
+
+  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    SDValue ArgNode = getValue(*i);
+    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+
+    unsigned attrInd = i - CS.arg_begin() + 1;
+    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
+    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
+    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
+    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
+    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+    Entry.Alignment = CS.getParamAlignment(attrInd);
+    Args.push_back(Entry);
+  }
+
+  if (LandingPad && MMI) {
+    // Insert a label before the invoke call to mark the try range.  This can be
+    // used to detect deletion of the invoke via the MachineModuleInfo.
+    BeginLabel = MMI->NextLabelID();
+
+    // For SjLj, keep track of which landing pads go with which invokes
+    // so as to maintain the ordering of pads in the LSDA.
+    unsigned CallSiteIndex = MMI->getCurrentCallSite();
+    if (CallSiteIndex) {
+      MMI->setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      // Now that the call site is handled, stop tracking it.
+      MMI->setCurrentCallSite(0);
+    }
+
+    // Both PendingLoads and PendingExports must be flushed here;
+    // this call might not return.
+    (void)getRoot();
+    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+                             getControlRoot(), BeginLabel));
+  }
+
+  // Check if target-independent constraints permit a tail call here.
+  // Target-dependent constraints are checked within TLI.LowerCallTo.
+  if (isTailCall &&
+      !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
+    isTailCall = false;
+
+  std::pair<SDValue,SDValue> Result =
+    TLI.LowerCallTo(getRoot(), RetTy,
+                    CS.paramHasAttr(0, Attribute::SExt),
+                    CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
+                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
+                    CS.getCallingConv(),
+                    isTailCall,
+                    !CS.getInstruction()->use_empty(),
+                    Callee, Args, DAG, getCurDebugLoc(), SDNodeOrder);
+  assert((isTailCall || Result.second.getNode()) &&
+         "Non-null chain expected with non-tail call!");
+  assert((Result.second.getNode() || !Result.first.getNode()) &&
+         "Null value expected with tail call!");
+  if (Result.first.getNode()) {
+    setValue(CS.getInstruction(), Result.first);
+  } else if (!CanLowerReturn && Result.second.getNode()) {
+    // The instruction result is the result of loading from the
+    // hidden sret parameter.
+    SmallVector<EVT, 1> PVTs;
+    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+    ComputeValueVTs(TLI, PtrRetTy, PVTs);
+    assert(PVTs.size() == 1 && "Pointers should fit in one register");
+    EVT PtrVT = PVTs[0];
+    unsigned NumValues = OutVTs.size();
+    SmallVector<SDValue, 4> Values(NumValues);
+    SmallVector<SDValue, 4> Chains(NumValues);
+
+    for (unsigned i = 0; i < NumValues; ++i) {
+      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
+                                DemoteStackSlot,
+                                DAG.getConstant(Offsets[i], PtrVT));
+      SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
+                              Add, NULL, Offsets[i], false, 1);
+      Values[i] = L;
+      Chains[i] = L.getValue(1);
+    }
+
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                MVT::Other, &Chains[0], NumValues);
+    PendingLoads.push_back(Chain);
+    
+    // Collect the legal value parts into potentially illegal values
+    // that correspond to the original function's return values.
+    SmallVector<EVT, 4> RetTys;
+    RetTy = FTy->getReturnType();
+    ComputeValueVTs(TLI, RetTy, RetTys);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SmallVector<SDValue, 4> ReturnValues;
+    unsigned CurReg = 0;
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
+  
+      SDValue ReturnValue =
+        getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs,
+                         RegisterVT, VT, AssertOp);
+      ReturnValues.push_back(ReturnValue);
+      CurReg += NumRegs;
+    }
+
+    setValue(CS.getInstruction(),
+             DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                         DAG.getVTList(&RetTys[0], RetTys.size()),
+                         &ReturnValues[0], ReturnValues.size()));
+
+  }
+
+  // As a special case, a null chain means that a tail call has been emitted and
+  // the DAG root is already updated.
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
+  else
+    HasTailCall = true;
+
+  if (LandingPad && MMI) {
+    // Insert a label at the end of the invoke call to mark the try range.  This
+    // can be used to detect deletion of the invoke via the MachineModuleInfo.
+    EndLabel = MMI->NextLabelID();
+    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+                             getRoot(), EndLabel));
+
+    // Inform MachineModuleInfo of range.
+    MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+  }
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality())
+        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+          if (C->isNullValue())
+            continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+
+static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
+                             SelectionDAGBuilder &Builder) {
+
+  // Check to see if this load can be trivially constant folded, e.g. if the
+  // input is from a string literal.
+  if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+    // Cast pointer to the type we really want to load.
+    LoadInput = ConstantExpr::getBitCast(LoadInput,
+                                         PointerType::getUnqual(LoadTy));
+
+    if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD))
+      return Builder.getValue(LoadCst);
+  }
+
+  // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
+  // still constant memory, the input chain can be the entry node.
+  SDValue Root;
+  bool ConstantMemory = false;
+
+  // Do not serialize (non-volatile) loads of constant memory with anything.
+  if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+    Root = Builder.DAG.getEntryNode();
+    ConstantMemory = true;
+  } else {
+    // Do not serialize non-volatile loads against each other.
+    Root = Builder.DAG.getRoot();
+  }
+
+  SDValue Ptr = Builder.getValue(PtrVal);
+  SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
+                                        Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+                                        false /*volatile*/, 1 /* align=1 */);
+
+  if (!ConstantMemory)
+    Builder.PendingLoads.push_back(LoadVal.getValue(1));
+  return LoadVal;
+}
+
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
+  // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
+  if (I.getNumOperands() != 4)
+    return false;
+
+  Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
+  if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
+      !isa<IntegerType>(I.getOperand(3)->getType()) ||
+      !isa<IntegerType>(I.getType()))
+    return false;
+
+  ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
+
+  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
+  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
+  if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+    bool ActuallyDoIt = true;
+    MVT LoadVT;
+    const Type *LoadTy;
+    switch (Size->getZExtValue()) {
+    default:
+      LoadVT = MVT::Other;
+      LoadTy = 0;
+      ActuallyDoIt = false;
+      break;
+    case 2:
+      LoadVT = MVT::i16;
+      LoadTy = Type::getInt16Ty(Size->getContext());
+      break;
+    case 4:
+      LoadVT = MVT::i32;
+      LoadTy = Type::getInt32Ty(Size->getContext());
+      break;
+    case 8:
+      LoadVT = MVT::i64;
+      LoadTy = Type::getInt64Ty(Size->getContext());
+      break;
+        /*
+    case 16:
+      LoadVT = MVT::v4i32;
+      LoadTy = Type::getInt32Ty(Size->getContext());
+      LoadTy = VectorType::get(LoadTy, 4);
+      break;
+         */
+    }
+
+    // This turns into unaligned loads.  We only do this if the target natively
+    // supports the MVT we'll be loading or if it is small enough (<= 4) that
+    // we'll only produce a small number of byte loads.
+
+    // Require that we can find a legal MVT, and only do this if the target
+    // supports unaligned loads of that type.  Expanding into byte loads would
+    // bloat the code.
+    if (ActuallyDoIt && Size->getZExtValue() > 4) {
+      // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+      // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+      if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+        ActuallyDoIt = false;
+    }
+
+    if (ActuallyDoIt) {
+      SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+      SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+      SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+                                 ISD::SETNE);
+      EVT CallVT = TLI.getValueType(I.getType(), true);
+      setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+      return true;
+    }
+  }
+
+
+  return false;
+}
+
+
+void SelectionDAGBuilder::visitCall(CallInst &I) {
+  const char *RenameFn = 0;
+  if (Function *F = I.getCalledFunction()) {
+    if (F->isDeclaration()) {
+      const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
+      if (II) {
+        if (unsigned IID = II->getIntrinsicID(F)) {
+          RenameFn = visitIntrinsicCall(I, IID);
+          if (!RenameFn)
+            return;
+        }
+      }
+      if (unsigned IID = F->getIntrinsicID()) {
+        RenameFn = visitIntrinsicCall(I, IID);
+        if (!RenameFn)
+          return;
+      }
+    }
+
+    // Check for well-known libc/libm calls.  If the function is internal, it
+    // can't be a library call.
+    if (!F->hasLocalLinkage() && F->hasName()) {
+      StringRef Name = F->getName();
+      if (Name == "copysign" || Name == "copysignf") {
+        if (I.getNumOperands() == 3 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.getType() == I.getOperand(2)->getType()) {
+          SDValue LHS = getValue(I.getOperand(1));
+          SDValue RHS = getValue(I.getOperand(2));
+          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+                                   LHS.getValueType(), LHS, RHS));
+          return;
+        }
+      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "memcmp") {
+        if (visitMemCmpCall(I))
+          return;
+      }
+    }
+  } else if (isa<InlineAsm>(I.getOperand(0))) {
+    visitInlineAsm(&I);
+    return;
+  }
+
+  SDValue Callee;
+  if (!RenameFn)
+    Callee = getValue(I.getOperand(0));
+  else
+    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+  // Check if we can potentially perform a tail call. More detailed checking is
+  // be done within LowerCallTo, after more information about the call is known.
+  LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value.  This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
+                                      unsigned Order, SDValue &Chain,
+                                      SDValue *Flag) const {
+  // Assemble the legal parts into the final values.
+  SmallVector<SDValue, 4> Values(ValueVTs.size());
+  SmallVector<SDValue, 8> Parts;
+  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    // Copy the legal parts from the registers.
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
+
+    Parts.resize(NumRegs);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      SDValue P;
+      if (Flag == 0) {
+        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+      } else {
+        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+        *Flag = P.getValue(2);
+      }
+
+      Chain = P.getValue(1);
+
+      // If the source register was virtual and if we know something about it,
+      // add an assert node.
+      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
+          RegisterVT.isInteger() && !RegisterVT.isVector()) {
+        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
+        FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+        if (FLI.LiveOutRegInfo.size() > SlotNo) {
+          FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
+
+          unsigned RegSize = RegisterVT.getSizeInBits();
+          unsigned NumSignBits = LOI.NumSignBits;
+          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+          // FIXME: We capture more information than the dag can represent.  For
+          // now, just use the tightest assertzext/assertsext possible.
+          bool isSExt = true;
+          EVT FromVT(MVT::Other);
+          if (NumSignBits == RegSize)
+            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
+          else if (NumZeroBits >= RegSize-1)
+            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
+          else if (NumSignBits > RegSize-8)
+            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
+          else if (NumZeroBits >= RegSize-8)
+            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
+          else if (NumSignBits > RegSize-16)
+            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
+          else if (NumZeroBits >= RegSize-16)
+            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+          else if (NumSignBits > RegSize-32)
+            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
+          else if (NumZeroBits >= RegSize-32)
+            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+
+          if (FromVT != MVT::Other)
+            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+                            RegisterVT, P, DAG.getValueType(FromVT));
+        }
+      }
+
+      Parts[i] = P;
+    }
+
+    Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(),
+                                     NumRegs, RegisterVT, ValueVT);
+    Part += NumRegs;
+    Parts.clear();
+  }
+
+  return DAG.getNode(ISD::MERGE_VALUES, dl,
+                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+                     &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object.  This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+                                 unsigned Order, SDValue &Chain,
+                                 SDValue *Flag) const {
+  // Get the list of the values's legal parts.
+  unsigned NumRegs = Regs.size();
+  SmallVector<SDValue, 8> Parts(NumRegs);
+  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
+
+    getCopyToParts(DAG, dl, Order,
+                   Val.getValue(Val.getResNo() + Value),
+                   &Parts[Part], NumParts, RegisterVT);
+    Part += NumParts;
+  }
+
+  // Copy the parts into the registers.
+  SmallVector<SDValue, 8> Chains(NumRegs);
+  for (unsigned i = 0; i != NumRegs; ++i) {
+    SDValue Part;
+    if (Flag == 0) {
+      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+    } else {
+      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+      *Flag = Part.getValue(1);
+    }
+
+    Chains[i] = Part.getValue(0);
+  }
+
+  if (NumRegs == 1 || Flag)
+    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+    // flagged to it. That is the CopyToReg nodes and the user are considered
+    // a single scheduling unit. If we create a TokenFactor and return it as
+    // chain, then the TokenFactor is both a predecessor (operand) of the
+    // user as well as a successor (the TF operands are flagged to the user).
+    // c1, f1 = CopyToReg
+    // c2, f2 = CopyToReg
+    // c3     = TokenFactor c1, c2
+    // ...
+    //        = op c3, ..., f2
+    Chain = Chains[NumRegs-1];
+  else
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list.  This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code,
+                                        bool HasMatching,unsigned MatchingIdx,
+                                        SelectionDAG &DAG, unsigned Order,
+                                        std::vector<SDValue> &Ops) const {
+  assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
+  unsigned Flag = Code | (Regs.size() << 3);
+  if (HasMatching)
+    Flag |= 0x80000000 | (MatchingIdx << 16);
+  SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+  Ops.push_back(Res);
+
+  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+    EVT RegisterVT = RegVTs[Value];
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      assert(Reg < Regs.size() && "Mismatch in # registers expected");
+      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+    }
+  }
+}
+
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register.  Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+                      const TargetLowering &TLI,
+                      const TargetRegisterInfo *TRI) {
+  EVT FoundVT = MVT::Other;
+  const TargetRegisterClass *FoundRC = 0;
+  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
+       E = TRI->regclass_end(); RCI != E; ++RCI) {
+    EVT ThisVT = MVT::Other;
+
+    const TargetRegisterClass *RC = *RCI;
+    // If none of the value types for this register class are valid, we
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (TLI.isTypeLegal(*I)) {
+        // If we have already found this register in a different register class,
+        // choose the one with the largest VT specified.  For example, on
+        // PowerPC, we favor f64 register classes over f32.
+        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
+          ThisVT = *I;
+          break;
+        }
+      }
+    }
+
+    if (ThisVT == MVT::Other) continue;
+
+    // NOTE: This isn't ideal.  In particular, this might allocate the
+    // frame pointer in functions that need it (due to them not being taken
+    // out of allocation, because a variable sized allocation hasn't been seen
+    // yet).  This is a slight code pessimization, but should still work.
+    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+         E = RC->allocation_order_end(MF); I != E; ++I)
+      if (*I == Reg) {
+        // We found a matching register class.  Keep looking at others in case
+        // we find one with larger registers that this physreg is also in.
+        FoundRC = RC;
+        FoundVT = ThisVT;
+        break;
+      }
+  }
+  return FoundRC;
+}
+
+
+namespace llvm {
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
+    public TargetLowering::AsmOperandInfo {
+public:
+  /// CallOperand - If this is the result output operand or a clobber
+  /// this is null, otherwise it is the incoming operand to the CallInst.
+  /// This gets modified as the asm is processed.
+  SDValue CallOperand;
+
+  /// AssignedRegs - If this is a register or register class operand, this
+  /// contains the set of register corresponding to the operand.
+  RegsForValue AssignedRegs;
+
+  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+    : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+  }
+
+  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+  /// busy in OutputRegs/InputRegs.
+  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+                         std::set<unsigned> &OutputRegs,
+                         std::set<unsigned> &InputRegs,
+                         const TargetRegisterInfo &TRI) const {
+    if (isOutReg) {
+      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
+    }
+    if (isInReg) {
+      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
+    }
+  }
+
+  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+  /// corresponds to.  If there is no Value* for this operand, it returns
+  /// MVT::Other.
+  EVT getCallOperandValEVT(LLVMContext &Context,
+                           const TargetLowering &TLI,
+                           const TargetData *TD) const {
+    if (CallOperandVal == 0) return MVT::Other;
+
+    if (isa<BasicBlock>(CallOperandVal))
+      return TLI.getPointerTy();
+
+    const llvm::Type *OpTy = CallOperandVal->getType();
+
+    // If this is an indirect operand, the operand is a pointer to the
+    // accessed type.
+    if (isIndirect) {
+      const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+      if (!PtrTy)
+        llvm_report_error("Indirect operand for inline asm not a pointer!");
+      OpTy = PtrTy->getElementType();
+    }
+
+    // If OpTy is not a single value, it may be a struct/union that we
+    // can tile with integers.
+    if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+      switch (BitSize) {
+      default: break;
+      case 1:
+      case 8:
+      case 16:
+      case 32:
+      case 64:
+      case 128:
+        OpTy = IntegerType::get(Context, BitSize);
+        break;
+      }
+    }
+
+    return TLI.getValueType(OpTy, true);
+  }
+
+private:
+  /// MarkRegAndAliases - Mark the specified register and all aliases in the
+  /// specified set.
+  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
+                                const TargetRegisterInfo &TRI) {
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
+    Regs.insert(Reg);
+    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
+      for (; *Aliases; ++Aliases)
+        Regs.insert(*Aliases);
+  }
+};
+} // end llvm namespace.
+
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand.  We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process.  However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation.  This produces generally horrible, but correct, code.
+///
+///   OpInfo describes the operand.
+///   Input and OutputRegs are the set of already allocated physical registers.
+///
+void SelectionDAGBuilder::
+GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+                     std::set<unsigned> &OutputRegs,
+                     std::set<unsigned> &InputRegs) {
+  LLVMContext &Context = FuncInfo.Fn->getContext();
+
+  // Compute whether this value requires an input register, an output register,
+  // or both.
+  bool isOutReg = false;
+  bool isInReg = false;
+  switch (OpInfo.Type) {
+  case InlineAsm::isOutput:
+    isOutReg = true;
+
+    // If there is an input constraint that matches this, we need to reserve
+    // the input register so no other inputs allocate to it.
+    isInReg = OpInfo.hasMatchingInput();
+    break;
+  case InlineAsm::isInput:
+    isInReg = true;
+    isOutReg = false;
+    break;
+  case InlineAsm::isClobber:
+    isOutReg = true;
+    isInReg = true;
+    break;
+  }
+
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  SmallVector<unsigned, 4> Regs;
+
+  // If this is a constraint for a single physreg, or a constraint for a
+  // register class, find it.
+  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                     OpInfo.ConstraintVT);
+
+  unsigned NumRegs = 1;
+  if (OpInfo.ConstraintVT != MVT::Other) {
+    // If this is a FP input in an integer register (or visa versa) insert a bit
+    // cast of the input value.  More generally, handle any case where the input
+    // value disagrees with the register class we plan to stick this in.
+    if (OpInfo.Type == InlineAsm::isInput &&
+        PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+      // Try to convert to the first EVT that the reg class contains.  If the
+      // types are identical size, use a bitcast to convert (e.g. two differing
+      // vector types).
+      EVT RegVT = *PhysReg.second->vt_begin();
+      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                                         RegVT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = RegVT;
+      } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+        // If the input is a FP value and we want it in FP registers, do a
+        // bitcast to the corresponding integer type.  This turns an f64 value
+        // into i64, which can be passed with two i32 values on a 32-bit
+        // machine.
+        RegVT = EVT::getIntegerVT(Context,
+                                  OpInfo.ConstraintVT.getSizeInBits());
+        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                                         RegVT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = RegVT;
+      }
+    }
+
+    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+  }
+
+  EVT RegVT;
+  EVT ValueVT = OpInfo.ConstraintVT;
+
+  // If this is a constraint for a specific physical register, like {r17},
+  // assign it now.
+  if (unsigned AssignedReg = PhysReg.first) {
+    const TargetRegisterClass *RC = PhysReg.second;
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = *RC->vt_begin();
+
+    // Get the actual register value type.  This is important, because the user
+    // may have asked for (e.g.) the AX register in i32 type.  We need to
+    // remember that AX is actually i16 to get the right extension.
+    RegVT = *RC->vt_begin();
+
+    // This is a explicit reference to a physical register.
+    Regs.push_back(AssignedReg);
+
+    // If this is an expanded reference, add the rest of the regs to Regs.
+    if (NumRegs != 1) {
+      TargetRegisterClass::iterator I = RC->begin();
+      for (; *I != AssignedReg; ++I)
+        assert(I != RC->end() && "Didn't find reg!");
+
+      // Already added the first reg.
+      --NumRegs; ++I;
+      for (; NumRegs; --NumRegs, ++I) {
+        assert(I != RC->end() && "Ran out of registers to allocate!");
+        Regs.push_back(*I);
+      }
+    }
+
+    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+    return;
+  }
+
+  // Otherwise, if this was a reference to an LLVM register class, create vregs
+  // for this reference.
+  if (const TargetRegisterClass *RC = PhysReg.second) {
+    RegVT = *RC->vt_begin();
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = RegVT;
+
+    // Create the appropriate number of virtual registers.
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+    for (; NumRegs; --NumRegs)
+      Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+    return;
+  }
+
+  // This is a reference to a register class that doesn't directly correspond
+  // to an LLVM register class.  Allocate NumRegs consecutive, available,
+  // registers from the class.
+  std::vector<unsigned> RegClassRegs
+    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                            OpInfo.ConstraintVT);
+
+  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+  unsigned NumAllocated = 0;
+  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+    unsigned Reg = RegClassRegs[i];
+    // See if this register is available.
+    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
+        (isInReg  && InputRegs.count(Reg))) {    // Already used.
+      // Make sure we find consecutive registers.
+      NumAllocated = 0;
+      continue;
+    }
+
+    // Check to see if this register is allocatable (i.e. don't give out the
+    // stack pointer).
+    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
+    if (!RC) {        // Couldn't allocate this register.
+      // Reset NumAllocated to make sure we return consecutive registers.
+      NumAllocated = 0;
+      continue;
+    }
+
+    // Okay, this register is good, we can use it.
+    ++NumAllocated;
+
+    // If we allocated enough consecutive registers, succeed.
+    if (NumAllocated == NumRegs) {
+      unsigned RegStart = (i-NumAllocated)+1;
+      unsigned RegEnd   = i+1;
+      // Mark all of the allocated registers used.
+      for (unsigned i = RegStart; i != RegEnd; ++i)
+        Regs.push_back(RegClassRegs[i]);
+
+      OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
+                                         OpInfo.ConstraintVT);
+      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+      return;
+    }
+  }
+
+  // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+static bool
+hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+                          const TargetLowering &TLI) {
+  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+    InlineAsm::ConstraintInfo &CI = CInfos[i];
+    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+      if (CType == TargetLowering::C_Memory)
+        return true;
+    }
+
+    // Indirect operand accesses access memory.
+    if (CI.isIndirect)
+      return true;
+  }
+
+  return false;
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
+  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+  /// ConstraintOperands - Information about all of the constraints.
+  std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+
+  std::set<unsigned> OutputRegs, InputRegs;
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  std::vector<InlineAsm::ConstraintInfo>
+    ConstraintInfos = IA->ParseConstraints();
+
+  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
+
+  SDValue Chain, Flag;
+
+  // We won't need to flush pending loads if this asm doesn't touch
+  // memory and is nonvolatile.
+  if (hasMemory || IA->hasSideEffects())
+    Chain = getRoot();
+  else
+    Chain = DAG.getRoot();
+
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    EVT OpVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      // Indirect outputs just consume an argument.
+      if (OpInfo.isIndirect) {
+        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+        break;
+      }
+
+      // The return value of the call is this value.  As such, there is no
+      // corresponding argument.
+      assert(!CS.getType()->isVoidTy() &&
+             "Bad inline asm!");
+      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+        OpVT = TLI.getValueType(STy->getElementType(ResNo));
+      } else {
+        assert(ResNo == 0 && "Asm only has one result!");
+        OpVT = TLI.getValueType(CS.getType());
+      }
+      ++ResNo;
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    // If this is an input or an indirect output, process the call argument.
+    // BasicBlocks are labels, currently appearing only in asm's.
+    if (OpInfo.CallOperandVal) {
+      // Strip bitcasts, if any.  This mostly comes up for functions.
+      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
+
+      if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+      } else {
+        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+      }
+
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
+    }
+
+    OpInfo.ConstraintVT = OpVT;
+  }
+
+  // Second pass over the constraints: compute which constraint option to use
+  // and assign registers to constraints that want a specific physreg.
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    // If this is an output operand with a matching input operand, look up the
+    // matching input. If their types mismatch, e.g. one is an integer, the
+    // other is floating point, or their sizes are different, flag it as an
+    // error.
+    if (OpInfo.hasMatchingInput()) {
+      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+        if ((OpInfo.ConstraintVT.isInteger() !=
+             Input.ConstraintVT.isInteger()) ||
+            (OpInfo.ConstraintVT.getSizeInBits() !=
+             Input.ConstraintVT.getSizeInBits())) {
+          llvm_report_error("Unsupported asm: input constraint"
+                            " with a matching output constraint of incompatible"
+                            " type!");
+        }
+        Input.ConstraintVT = OpInfo.ConstraintVT;
+      }
+    }
+
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
+
+    // If this is a memory input, and if the operand is not indirect, do what we
+    // need to to provide an address for the memory input.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        !OpInfo.isIndirect) {
+      assert(OpInfo.Type == InlineAsm::isInput &&
+             "Can only indirectify direct input operands!");
+
+      // Memory operands really want the address of the value.  If we don't have
+      // an indirect input, put it in the constpool if we can, otherwise spill
+      // it to a stack slot.
+
+      // If the operand is a float, integer, or vector constant, spill to a
+      // constant pool entry to get its address.
+      Value *OpVal = OpInfo.CallOperandVal;
+      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+          isa<ConstantVector>(OpVal)) {
+        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+                                                 TLI.getPointerTy());
+      } else {
+        // Otherwise, create a stack slot and emit a store to it before the
+        // asm.
+        const Type *Ty = OpVal->getType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        Chain = DAG.getStore(Chain, getCurDebugLoc(),
+                             OpInfo.CallOperand, StackSlot, NULL, 0);
+        OpInfo.CallOperand = StackSlot;
+      }
+
+      // There is no longer a Value* corresponding to this operand.
+      OpInfo.CallOperandVal = 0;
+
+      // It is now an indirect operand.
+      OpInfo.isIndirect = true;
+    }
+
+    // If this constraint is for a specific register, allocate it before
+    // anything else.
+    if (OpInfo.ConstraintType == TargetLowering::C_Register)
+      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+  }
+
+  ConstraintInfos.clear();
+
+  // Second pass - Loop over all of the operands, assigning virtual or physregs
+  // to register class operands.
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    // C_Register operands have already been allocated, Other/Memory don't need
+    // to be.
+    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+  }
+
+  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+  std::vector<SDValue> AsmNodeOperands;
+  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
+  AsmNodeOperands.push_back(
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+                                      TLI.getPointerTy()));
+
+
+  // Loop over all of the inputs, copying the operand values into the
+  // appropriate registers and processing the output regs.
+  RegsForValue RetValRegs;
+
+  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput: {
+      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+          OpInfo.ConstraintType != TargetLowering::C_Register) {
+        // Memory output, or 'other' output (e.g. 'X' constraint).
+        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+        // Add information to the INLINEASM node to know about this output.
+        unsigned ResOpType = 4/*MEM*/ | (1<<3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(OpInfo.CallOperand);
+        break;
+      }
+
+      // Otherwise, this is a register or register class output.
+
+      // Copy the output from the appropriate register.  Find a register that
+      // we can use.
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        llvm_report_error("Couldn't allocate output reg for"
+                          " constraint '" + OpInfo.ConstraintCode + "'!");
+      }
+
+      // If this is an indirect operand, store through the pointer after the
+      // asm.
+      if (OpInfo.isIndirect) {
+        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+                                                      OpInfo.CallOperandVal));
+      } else {
+        // This is the result value of the call.
+        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+        // Concatenate this output onto the outputs list.
+        RetValRegs.append(OpInfo.AssignedRegs);
+      }
+
+      // Add information to the INLINEASM node to know that this register is
+      // set.
+      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+                                               6 /* EARLYCLOBBER REGDEF */ :
+                                               2 /* REGDEF */ ,
+                                               false,
+                                               0,
+                                               DAG, SDNodeOrder,
+                                               AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isInput: {
+      SDValue InOperandVal = OpInfo.CallOperand;
+
+      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
+        // If this is required to match an output register we have already set,
+        // just use its register.
+        unsigned OperandNo = OpInfo.getMatchedOperand();
+
+        // Scan until we find the definition we already emitted of this operand.
+        // When we find it, create a RegsForValue operand.
+        unsigned CurOp = 2;  // The first operand.
+        for (; OperandNo; --OperandNo) {
+          // Advance to the next operand.
+          unsigned OpFlag =
+            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+          assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
+                  (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
+                  (OpFlag & 7) == 4 /*MEM*/) &&
+                 "Skipped past definitions?");
+          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+        }
+
+        unsigned OpFlag =
+          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+        if ((OpFlag & 7) == 2 /*REGDEF*/
+            || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
+          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+          if (OpInfo.isIndirect) {
+            llvm_report_error("Don't know how to handle tied indirect "
+                              "register inputs yet!");
+          }
+          RegsForValue MatchedRegs;
+          MatchedRegs.TLI = &TLI;
+          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          MatchedRegs.RegVTs.push_back(RegVT);
+          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+               i != e; ++i)
+            MatchedRegs.Regs.push_back
+              (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+          // Use the produced MatchedRegs object to
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+                                    SDNodeOrder, Chain, &Flag);
+          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+                                           true, OpInfo.getMatchedOperand(),
+                                           DAG, SDNodeOrder, AsmNodeOperands);
+          break;
+        } else {
+          assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
+          assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
+                 "Unexpected number of operands");
+          // Add information to the INLINEASM node to know about this input.
+          // See InlineAsm.h isUseOperandTiedToDef.
+          OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
+          AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+                                                          TLI.getPointerTy()));
+          AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+          break;
+        }
+      }
+
+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+        assert(!OpInfo.isIndirect &&
+               "Don't know how to handle indirect other inputs yet!");
+
+        std::vector<SDValue> Ops;
+        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+                                         hasMemory, Ops, DAG);
+        if (Ops.empty()) {
+          llvm_report_error("Invalid operand for inline asm"
+                            " constraint '" + OpInfo.ConstraintCode + "'!");
+        }
+
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+        break;
+      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+               "Memory operands expect pointer values");
+
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = 4/*MEM*/ | (1<<3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(InOperandVal);
+        break;
+      }
+
+      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+              OpInfo.ConstraintType == TargetLowering::C_Register) &&
+             "Unknown constraint type!");
+      assert(!OpInfo.isIndirect &&
+             "Don't know how to handle indirect register inputs yet!");
+
+      // Copy the input into the appropriate registers.
+      if (OpInfo.AssignedRegs.Regs.empty() ||
+          !OpInfo.AssignedRegs.areValueTypesLegal()) {
+        llvm_report_error("Couldn't allocate input reg for"
+                          " constraint '"+ OpInfo.ConstraintCode +"'!");
+      }
+
+      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+                                        SDNodeOrder, Chain, &Flag);
+
+      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
+                                               DAG, SDNodeOrder,
+                                               AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isClobber: {
+      // Add the clobbered value to the operand list, so that the register
+      // allocator is aware that the physreg got clobbered.
+      if (!OpInfo.AssignedRegs.Regs.empty())
+        OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
+                                                 false, 0, DAG, SDNodeOrder,
+                                                 AsmNodeOperands);
+      break;
+    }
+    }
+  }
+
+  // Finish up input operands.
+  AsmNodeOperands[0] = Chain;
+  if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+  Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+                      DAG.getVTList(MVT::Other, MVT::Flag),
+                      &AsmNodeOperands[0], AsmNodeOperands.size());
+  Flag = Chain.getValue(1);
+
+  // If this asm returns a register value, copy the result from that register
+  // and set it as the value of the call.
+  if (!RetValRegs.Regs.empty()) {
+    SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+                                             SDNodeOrder, Chain, &Flag);
+
+    // FIXME: Why don't we do this for inline asms with MRVs?
+    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+      EVT ResultType = TLI.getValueType(CS.getType());
+
+      // If any of the results of the inline asm is a vector, it may have the
+      // wrong width/num elts.  This can happen for register classes that can
+      // contain multiple different value types.  The preg or vreg allocated may
+      // not have the same VT as was expected.  Convert it to the right type
+      // with bit_convert.
+      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                          ResultType, Val);
+
+      } else if (ResultType != Val.getValueType() &&
+                 ResultType.isInteger() && Val.getValueType().isInteger()) {
+        // If a result value was tied to an input value, the computed result may
+        // have a wider width than the expected result.  Extract the relevant
+        // portion.
+        Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+      }
+
+      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+    }
+
+    setValue(CS.getInstruction(), Val);
+    // Don't need to use this as a chain in this case.
+    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+      return;
+  }
+
+  std::vector<std::pair<SDValue, Value*> > StoresToEmit;
+
+  // Process indirect outputs, first output all of the flagged copies out of
+  // physregs.
+  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+    Value *Ptr = IndirectStoresToEmit[i].second;
+    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+                                             SDNodeOrder, Chain, &Flag);
+    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+
+  }
+
+  // Emit the non-flagged stores from the physregs.
+  SmallVector<SDValue, 8> OutChains;
+  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+    SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
+                               StoresToEmit[i].first,
+                               getValue(StoresToEmit[i].second),
+                               StoresToEmit[i].second, 0);
+    OutChains.push_back(Val);
+  }
+
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                        &OutChains[0], OutChains.size());
+
+  DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getOperand(1)),
+                          DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
+  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+                           getRoot(), getValue(I.getOperand(0)),
+                           DAG.getSrcValue(I.getOperand(0)));
+  setValue(&I, V);
+  DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getOperand(1)),
+                          DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGBuilder::visitVACopy(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getOperand(1)),
+                          getValue(I.getOperand(2)),
+                          DAG.getSrcValue(I.getOperand(1)),
+                          DAG.getSrcValue(I.getOperand(2))));
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+                            bool RetSExt, bool RetZExt, bool isVarArg,
+                            bool isInreg, unsigned NumFixedArgs,
+                            CallingConv::ID CallConv, bool isTailCall,
+                            bool isReturnValueUsed,
+                            SDValue Callee,
+                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl,
+                            unsigned Order) {
+  // Handle all of the outgoing arguments.
+  SmallVector<ISD::OutputArg, 32> Outs;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+      SDValue Op = SDValue(Args[i].Node.getNode(),
+                           Args[i].Node.getResNo() + Value);
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        getTargetData()->getABITypeAlignment(ArgTy);
+
+      if (Args[i].isZExt)
+        Flags.setZExt();
+      if (Args[i].isSExt)
+        Flags.setSExt();
+      if (Args[i].isInReg)
+        Flags.setInReg();
+      if (Args[i].isSRet)
+        Flags.setSRet();
+      if (Args[i].isByVal) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
+        const Type *ElementTy = Ty->getElementType();
+        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
+        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
+        // For ByVal, alignment should come from FE.  BE will guess if this
+        // info is not there but there are cases it cannot get right.
+        if (Args[i].Alignment)
+          FrameAlign = Args[i].Alignment;
+        Flags.setByValAlign(FrameAlign);
+        Flags.setByValSize(FrameSize);
+      }
+      if (Args[i].isNest)
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
+      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+      SmallVector<SDValue, 4> Parts(NumParts);
+      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+      if (Args[i].isSExt)
+        ExtendKind = ISD::SIGN_EXTEND;
+      else if (Args[i].isZExt)
+        ExtendKind = ISD::ZERO_EXTEND;
+
+      getCopyToParts(DAG, dl, Order, Op, &Parts[0], NumParts,
+                     PartVT, ExtendKind);
+
+      for (unsigned j = 0; j != NumParts; ++j) {
+        // if it isn't first piece, alignment must be 1
+        ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
+        if (NumParts > 1 && j == 0)
+          MyFlags.Flags.setSplit();
+        else if (j != 0)
+          MyFlags.Flags.setOrigAlign(1);
+
+        Outs.push_back(MyFlags);
+      }
+    }
+  }
+
+  // Handle the incoming return values from the call.
+  SmallVector<ISD::InputArg, 32> Ins;
+  SmallVector<EVT, 4> RetTys;
+  ComputeValueVTs(*this, RetTy, RetTys);
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT;
+      MyFlags.Used = isReturnValueUsed;
+      if (RetSExt)
+        MyFlags.Flags.setSExt();
+      if (RetZExt)
+        MyFlags.Flags.setZExt();
+      if (isInreg)
+        MyFlags.Flags.setInReg();
+      Ins.push_back(MyFlags);
+    }
+  }
+
+  SmallVector<SDValue, 4> InVals;
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+                    Outs, Ins, dl, DAG, InVals);
+
+  // Verify that the target's LowerCall behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerCall didn't return a valid chain!");
+  assert((!isTailCall || InVals.empty()) &&
+         "LowerCall emitted a return value for a tail call!");
+  assert((isTailCall || InVals.size() == Ins.size()) &&
+         "LowerCall didn't emit the correct number of values!");
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerCall emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerCall emitted a value with the wrong type!");
+        });
+
+  // For a tail call, the return value is merely live-out and there aren't
+  // any nodes in the DAG representing it. Return a special value to
+  // indicate that a tail call has been emitted and no more Instructions
+  // should be processed in the current block.
+  if (isTailCall) {
+    DAG.setRoot(Chain);
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  // Collect the legal value parts into potentially illegal values
+  // that correspond to the original function's return values.
+  ISD::NodeType AssertOp = ISD::DELETED_NODE;
+  if (RetSExt)
+    AssertOp = ISD::AssertSext;
+  else if (RetZExt)
+    AssertOp = ISD::AssertZext;
+  SmallVector<SDValue, 4> ReturnValues;
+  unsigned CurReg = 0;
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+
+    ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg],
+                                            NumRegs, RegisterVT, VT,
+                                            AssertOp));
+    CurReg += NumRegs;
+  }
+
+  // For a function returning void, there is no return value. We can't create
+  // such a node, so we just return a null return value in that case. In
+  // that case, nothing will actualy look at the value.
+  if (ReturnValues.empty())
+    return std::make_pair(SDValue(), Chain);
+
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+                            DAG.getVTList(&RetTys[0], RetTys.size()),
+                            &ReturnValues[0], ReturnValues.size());
+  return std::make_pair(Res, Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+                                           SmallVectorImpl<SDValue> &Results,
+                                           SelectionDAG &DAG) {
+  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+  if (Res.getNode())
+    Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+  llvm_unreachable("LowerOperation not implemented for this target!");
+  return SDValue();
+}
+
+void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+  SDValue Op = getValue(V);
+  assert((Op.getOpcode() != ISD::CopyFromReg ||
+          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+         "Copy from a reg to the same reg!");
+  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+  SDValue Chain = DAG.getEntryNode();
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), SDNodeOrder, Chain, 0);
+  PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
+  // If this is the entry block, emit arguments.
+  Function &F = *LLVMBB->getParent();
+  SelectionDAG &DAG = SDB->DAG;
+  SDValue OldRoot = DAG.getRoot();
+  DebugLoc dl = SDB->getCurDebugLoc();
+  const TargetData *TD = TLI.getTargetData();
+  SmallVector<ISD::InputArg, 16> Ins;
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<EVT, 4> OutVTs;
+  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+  getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                OutVTs, OutsFlags, TLI);
+  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+  FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
+                                          OutVTs, OutsFlags, DAG);
+  if (!FLI.CanLowerReturn) {
+    // Put in an sret pointer parameter before all the other parameters.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+    // NOTE: Assuming that a pointer will never break down to more than one VT
+    // or one register.
+    ISD::ArgFlagsTy Flags;
+    Flags.setSRet();
+    EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+    ISD::InputArg RetArg(Flags, RegisterVT, true);
+    Ins.push_back(RetArg);
+  }
+
+  // Set up the incoming argument description vector.
+  unsigned Idx = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++Idx) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    bool isArgValueUsed = !I->use_empty();
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        TD->getABITypeAlignment(ArgTy);
+
+      if (F.paramHasAttr(Idx, Attribute::ZExt))
+        Flags.setZExt();
+      if (F.paramHasAttr(Idx, Attribute::SExt))
+        Flags.setSExt();
+      if (F.paramHasAttr(Idx, Attribute::InReg))
+        Flags.setInReg();
+      if (F.paramHasAttr(Idx, Attribute::StructRet))
+        Flags.setSRet();
+      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(I->getType());
+        const Type *ElementTy = Ty->getElementType();
+        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        // For ByVal, alignment should be passed from FE.  BE will guess if
+        // this info is not there but there are cases it cannot get right.
+        if (F.getParamAlignment(Idx))
+          FrameAlign = F.getParamAlignment(Idx);
+        Flags.setByValAlign(FrameAlign);
+        Flags.setByValSize(FrameSize);
+      }
+      if (F.paramHasAttr(Idx, Attribute::Nest))
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        if (NumRegs > 1 && i == 0)
+          MyFlags.Flags.setSplit();
+        // if it isn't first piece, alignment must be 1
+        else if (i > 0)
+          MyFlags.Flags.setOrigAlign(1);
+        Ins.push_back(MyFlags);
+      }
+    }
+  }
+
+  // Call the target to set up the argument values.
+  SmallVector<SDValue, 8> InVals;
+  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+                                             F.isVarArg(), Ins,
+                                             dl, DAG, InVals);
+
+  // Verify that the target's LowerFormalArguments behaved as expected.
+  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+         "LowerFormalArguments didn't return a valid chain!");
+  assert(InVals.size() == Ins.size() &&
+         "LowerFormalArguments didn't emit the correct number of values!");
+  DEBUG({
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+        assert(InVals[i].getNode() &&
+               "LowerFormalArguments emitted a null value!");
+        assert(Ins[i].VT == InVals[i].getValueType() &&
+               "LowerFormalArguments emitted a value with the wrong type!");
+      }
+    });
+
+  // Update the DAG with the new chain value resulting from argument lowering.
+  DAG.setRoot(NewRoot);
+
+  // Set up the argument values.
+  unsigned i = 0;
+  Idx = 1;
+  if (!FLI.CanLowerReturn) {
+    // Create a virtual register for the sret pointer, and put in a copy
+    // from the sret argument into it.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+    EVT VT = ValueVTs[0];
+    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SDValue ArgValue = getCopyFromParts(DAG, dl, 0, &InVals[0], 1,
+                                        RegVT, VT, AssertOp);
+
+    MachineFunction& MF = SDB->DAG.getMachineFunction();
+    MachineRegisterInfo& RegInfo = MF.getRegInfo();
+    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+    FLI.DemoteRegister = SRetReg;
+    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
+                                    SRetReg, ArgValue);
+    DAG.setRoot(NewRoot);
+
+    // i indexes lowered arguments.  Bump it past the hidden sret argument.
+    // Idx indexes LLVM arguments.  Don't touch it.
+    ++i;
+  }
+
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+      ++I, ++Idx) {
+    SmallVector<SDValue, 4> ArgValues;
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    unsigned NumValues = ValueVTs.size();
+    for (unsigned Value = 0; Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+      if (!I->use_empty()) {
+        ISD::NodeType AssertOp = ISD::DELETED_NODE;
+        if (F.paramHasAttr(Idx, Attribute::SExt))
+          AssertOp = ISD::AssertSext;
+        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+          AssertOp = ISD::AssertZext;
+
+        ArgValues.push_back(getCopyFromParts(DAG, dl, 0, &InVals[i],
+                                             NumParts, PartVT, VT,
+                                             AssertOp));
+      }
+
+      i += NumParts;
+    }
+
+    if (!I->use_empty()) {
+      SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+                                       SDB->getCurDebugLoc());
+      SDB->setValue(I, Res);
+
+      // If this argument is live outside of the entry block, insert a copy from
+      // whereever we got it to the vreg that other BB's will reference it as.
+      SDB->CopyToExportRegsIfNeeded(I);
+    }
+  }
+
+  assert(i == InVals.size() && "Argument register count mismatch!");
+
+  // Finally, if the target has anything special to do, allow it to do so.
+  // FIXME: this should insert code into the DAG!
+  EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
+}
+
+/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
+/// ensure constants are generated when needed.  Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input.  We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB.  As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
+  TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+    PHINode *PN;
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::iterator I = SuccBB->begin();
+         (PN = dyn_cast<PHINode>(I)); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      unsigned Reg;
+      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = SDB->ConstantsOut[C];
+        if (RegOut == 0) {
+          RegOut = FuncInfo->CreateRegForValue(C);
+          SDB->CopyValueToVirtualRegister(C, RegOut);
+        }
+        Reg = RegOut;
+      } else {
+        Reg = FuncInfo->ValueMap[PHIOp];
+        if (Reg == 0) {
+          assert(isa<AllocaInst>(PHIOp) &&
+                 FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 "Didn't codegen value into a register!??");
+          Reg = FuncInfo->CreateRegForValue(PHIOp);
+          SDB->CopyValueToVirtualRegister(PHIOp, Reg);
+        }
+      }
+
+      // Remember that this register needs to added to the machine PHI node as
+      // the input for this MBB.
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+          SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+        Reg += NumRegisters;
+      }
+    }
+  }
+  SDB->ConstantsOut.clear();
+}
+
+/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
+/// supports legal types, and it emits MachineInstrs directly instead of
+/// creating SelectionDAG nodes.
+///
+bool
+SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
+                                                      FastISel *F) {
+  TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+  unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+    PHINode *PN;
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::iterator I = SuccBB->begin();
+         (PN = dyn_cast<PHINode>(I)); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      // Only handle legal types. Two interesting things to note here. First,
+      // by bailing out early, we may leave behind some dead instructions,
+      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+      // own moves. Second, this check is necessary becuase FastISel doesn't
+      // use CreateRegForValue to create registers, so it always creates
+      // exactly one register for each non-void instruction.
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+        // Promote MVT::i1.
+        if (VT == MVT::i1)
+          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
+        else {
+          SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+          return false;
+        }
+      }
+
+      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      unsigned Reg = F->getRegForValue(PHIOp);
+      if (Reg == 0) {
+        SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+        return false;
+      }
+      SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+    }
+  }
+
+  return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 0000000..bc4b33d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,502 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineRegisterInfo;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDISelAsmOperandInfo;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class TargetData;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class UnwindInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+/// Also, targets can overload any lowering method.
+///
+class SelectionDAGBuilder {
+  MachineBasicBlock *CurMBB;
+
+  /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
+  DebugLoc CurDebugLoc;
+
+  DenseMap<const Value*, SDValue> NodeMap;
+
+public:
+  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
+  /// them up and then emit token factor nodes when possible.  This allows us to
+  /// get simple disambiguation between loads without worrying about alias
+  /// analysis.
+  SmallVector<SDValue, 8> PendingLoads;
+private:
+
+  /// PendingExports - CopyToReg nodes that copy values to virtual registers
+  /// for export to other blocks need to be emitted before any terminator
+  /// instruction, but they have no other ordering requirements. We bunch them
+  /// up and the emit a single tokenfactor for them just before terminator
+  /// instructions.
+  SmallVector<SDValue, 8> PendingExports;
+
+  /// SDNodeOrder - A unique monotonically increasing number used to order the
+  /// SDNodes we create.
+  unsigned SDNodeOrder;
+
+  /// Case - A struct to record the Value for a switch case, and the
+  /// case's target basic block.
+  struct Case {
+    Constant* Low;
+    Constant* High;
+    MachineBasicBlock* BB;
+
+    Case() : Low(0), High(0), BB(0) { }
+    Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+      Low(low), High(high), BB(bb) { }
+    APInt size() const {
+      const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+      const APInt &rLow  = cast<ConstantInt>(Low)->getValue();
+      return (rHigh - rLow + 1ULL);
+    }
+  };
+
+  struct CaseBits {
+    uint64_t Mask;
+    MachineBasicBlock* BB;
+    unsigned Bits;
+
+    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+      Mask(mask), BB(bb), Bits(bits) { }
+  };
+
+  typedef std::vector<Case>           CaseVector;
+  typedef std::vector<CaseBits>       CaseBitsVector;
+  typedef CaseVector::iterator        CaseItr;
+  typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+  /// of conditional branches.
+  struct CaseRec {
+    CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+    /// CaseBB - The MBB in which to emit the compare and branch
+    MachineBasicBlock *CaseBB;
+    /// LT, GE - If nonzero, we know the current case value must be less-than or
+    /// greater-than-or-equal-to these Constants.
+    Constant *LT;
+    Constant *GE;
+    /// Range - A pair of iterators representing the range of case values to be
+    /// processed at this point in the binary search tree.
+    CaseRange Range;
+  };
+
+  typedef std::vector<CaseRec> CaseRecVector;
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator()(const Case &C1, const Case &C2) {
+      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+
+  struct CaseBitsCmp {
+    bool operator()(const CaseBits &C1, const CaseBits &C2) {
+      return C1.Bits > C2.Bits;
+    }
+  };
+
+  size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+
+  /// CaseBlock - This structure is used to communicate between
+  /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+  /// blocks needed by multi-case switch statements.
+  struct CaseBlock {
+    CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
+              MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+              MachineBasicBlock *me)
+      : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+        TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+    // CC - the condition code to use for the case block's setcc node
+    ISD::CondCode CC;
+    // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+    // Emit by default LHS op RHS. MHS is used for range comparisons:
+    // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+    Value *CmpLHS, *CmpMHS, *CmpRHS;
+    // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+    MachineBasicBlock *TrueBB, *FalseBB;
+    // ThisBB - the block into which to emit the code for the setcc and branches
+    MachineBasicBlock *ThisBB;
+  };
+  struct JumpTable {
+    JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+              MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+  
+    /// Reg - the virtual register containing the index of the jump table entry
+    //. to jump to.
+    unsigned Reg;
+    /// JTI - the JumpTableIndex for this jump table in the function.
+    unsigned JTI;
+    /// MBB - the MBB into which to emit the code for the indirect jump.
+    MachineBasicBlock *MBB;
+    /// Default - the MBB of the default bb, which is a successor of the range
+    /// check MBB.  This is when updating PHI nodes in successors.
+    MachineBasicBlock *Default;
+  };
+  struct JumpTableHeader {
+    JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H,
+                    bool E = false):
+      First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+    APInt First;
+    APInt Last;
+    Value *SValue;
+    MachineBasicBlock *HeaderBB;
+    bool Emitted;
+  };
+  typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+  struct BitTestCase {
+    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
+      Mask(M), ThisBB(T), TargetBB(Tr) { }
+    uint64_t Mask;
+    MachineBasicBlock *ThisBB;
+    MachineBasicBlock *TargetBB;
+  };
+
+  typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+  struct BitTestBlock {
+    BitTestBlock(APInt F, APInt R, Value* SV,
+                 unsigned Rg, bool E,
+                 MachineBasicBlock* P, MachineBasicBlock* D,
+                 const BitTestInfo& C):
+      First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+      Parent(P), Default(D), Cases(C) { }
+    APInt First;
+    APInt Range;
+    Value  *SValue;
+    unsigned Reg;
+    bool Emitted;
+    MachineBasicBlock *Parent;
+    MachineBasicBlock *Default;
+    BitTestInfo Cases;
+  };
+
+public:
+  // TLI - This is information that describes the available target features we
+  // need for lowering.  This indicates when operations are unavailable,
+  // implemented with a libcall, etc.
+  TargetLowering &TLI;
+  SelectionDAG &DAG;
+  const TargetData *TD;
+  AliasAnalysis *AA;
+
+  /// SwitchCases - Vector of CaseBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<CaseBlock> SwitchCases;
+  /// JTCases - Vector of JumpTable structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<JumpTableBlock> JTCases;
+  /// BitTestCases - Vector of BitTestBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<BitTestBlock> BitTestCases;
+
+  /// PHINodesToUpdate - A list of phi instructions whose operand list will
+  /// be updated after processing the current basic block.
+  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+  /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
+  /// scheduler custom lowering), track the change here.
+  DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
+
+  // Emit PHI-node-operand constants only once even if used by multiple
+  // PHI nodes.
+  DenseMap<Constant*, unsigned> ConstantsOut;
+
+  /// FuncInfo - Information about the function as a whole.
+  ///
+  FunctionLoweringInfo &FuncInfo;
+
+  /// OptLevel - What optimization level we're generating code for.
+  /// 
+  CodeGenOpt::Level OptLevel;
+  
+  /// GFI - Garbage collection metadata for the function.
+  GCFunctionInfo *GFI;
+
+  /// HasTailCall - This is set to true if a call in the current
+  /// block has been translated as a tail call. In this case,
+  /// no subsequent DAG nodes should be created.
+  ///
+  bool HasTailCall;
+
+  LLVMContext *Context;
+
+  SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli,
+                      FunctionLoweringInfo &funcinfo,
+                      CodeGenOpt::Level ol)
+    : CurDebugLoc(DebugLoc::getUnknownLoc()), SDNodeOrder(0),
+      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      HasTailCall(false),
+      Context(dag.getContext()) {
+  }
+
+  void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+
+  /// clear - Clear out the curret SelectionDAG and the associated
+  /// state and prepare this SelectionDAGBuilder object to be used
+  /// for a new block. This doesn't clear out information about
+  /// additional blocks that are needed to complete switch lowering
+  /// or PHI node updating; that information is cleared out as it is
+  /// consumed.
+  void clear();
+
+  /// getRoot - Return the current virtual root of the Selection DAG,
+  /// flushing any PendingLoad items. This must be done before emitting
+  /// a store or any other node that may need to be ordered after any
+  /// prior load instructions.
+  ///
+  SDValue getRoot();
+
+  /// getControlRoot - Similar to getRoot, but instead of flushing all the
+  /// PendingLoad items, flush all the PendingExports items. It is necessary
+  /// to do this before emitting a terminator instruction.
+  ///
+  SDValue getControlRoot();
+
+  DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+  void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
+
+  unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+  void CopyValueToVirtualRegister(Value *V, unsigned Reg);
+
+  /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
+  /// from how the code appeared in the source. The ordering is used by the
+  /// scheduler to effectively turn off scheduling.
+  void AssignOrderingToNode(const SDNode *Node);
+
+  void visit(Instruction &I);
+
+  void visit(unsigned Opcode, User &I);
+
+  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+
+  SDValue getValue(const Value *V);
+
+  void setValue(const Value *V, SDValue NewN) {
+    SDValue &N = NodeMap[V];
+    assert(N.getNode() == 0 && "Already set a value for this node!");
+    N = NewN;
+  }
+  
+  void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+                            std::set<unsigned> &OutputRegs, 
+                            std::set<unsigned> &InputRegs);
+
+  void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+                            unsigned Opc);
+  void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
+                                    MachineBasicBlock *FBB,
+                                    MachineBasicBlock *CurBB);
+  bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+  bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
+  void CopyToExportRegsIfNeeded(Value *V);
+  void ExportFromCurrentBlock(Value *V);
+  void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
+                   MachineBasicBlock *LandingPad = NULL);
+
+private:
+  // Terminator instructions.
+  void visitRet(ReturnInst &I);
+  void visitBr(BranchInst &I);
+  void visitSwitch(SwitchInst &I);
+  void visitIndirectBr(IndirectBrInst &I);
+  void visitUnreachable(UnreachableInst &I) { /* noop */ }
+
+  // Helpers for visitSwitch
+  bool handleSmallSwitchRange(CaseRec& CR,
+                              CaseRecVector& WorkList,
+                              Value* SV,
+                              MachineBasicBlock* Default);
+  bool handleJTSwitchCase(CaseRec& CR,
+                          CaseRecVector& WorkList,
+                          Value* SV,
+                          MachineBasicBlock* Default);
+  bool handleBTSplitSwitchCase(CaseRec& CR,
+                               CaseRecVector& WorkList,
+                               Value* SV,
+                               MachineBasicBlock* Default);
+  bool handleBitTestsSwitchCase(CaseRec& CR,
+                                CaseRecVector& WorkList,
+                                Value* SV,
+                                MachineBasicBlock* Default);  
+public:
+  void visitSwitchCase(CaseBlock &CB);
+  void visitBitTestHeader(BitTestBlock &B);
+  void visitBitTestCase(MachineBasicBlock* NextMBB,
+                        unsigned Reg,
+                        BitTestCase &B);
+  void visitJumpTable(JumpTable &JT);
+  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
+  
+private:
+  // These all get lowered before this pass.
+  void visitInvoke(InvokeInst &I);
+  void visitUnwind(UnwindInst &I);
+
+  void visitBinary(User &I, unsigned OpCode);
+  void visitShift(User &I, unsigned Opcode);
+  void visitAdd(User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(User &I);
+  void visitMul(User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
+  void visitURem(User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
+  void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(User &I);
+  void visitFCmp(User &I);
+  // Visit the conversion instructions
+  void visitTrunc(User &I);
+  void visitZExt(User &I);
+  void visitSExt(User &I);
+  void visitFPTrunc(User &I);
+  void visitFPExt(User &I);
+  void visitFPToUI(User &I);
+  void visitFPToSI(User &I);
+  void visitUIToFP(User &I);
+  void visitSIToFP(User &I);
+  void visitPtrToInt(User &I);
+  void visitIntToPtr(User &I);
+  void visitBitCast(User &I);
+
+  void visitExtractElement(User &I);
+  void visitInsertElement(User &I);
+  void visitShuffleVector(User &I);
+
+  void visitExtractValue(ExtractValueInst &I);
+  void visitInsertValue(InsertValueInst &I);
+
+  void visitGetElementPtr(User &I);
+  void visitSelect(User &I);
+
+  void visitAlloca(AllocaInst &I);
+  void visitLoad(LoadInst &I);
+  void visitStore(StoreInst &I);
+  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
+  void visitCall(CallInst &I);
+  bool visitMemCmpCall(CallInst &I);
+  
+  void visitInlineAsm(CallSite CS);
+  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+
+  void visitPow(CallInst &I);
+  void visitExp2(CallInst &I);
+  void visitExp(CallInst &I);
+  void visitLog(CallInst &I);
+  void visitLog2(CallInst &I);
+  void visitLog10(CallInst &I);
+
+  void visitVAStart(CallInst &I);
+  void visitVAArg(VAArgInst &I);
+  void visitVAEnd(CallInst &I);
+  void visitVACopy(CallInst &I);
+
+  void visitUserOp1(Instruction &I) {
+    llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+  }
+  void visitUserOp2(Instruction &I) {
+    llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+  }
+  
+  const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000..da2e6e4
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,1462 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+          cl::desc("Enable verbose messages in the \"fast\" "
+                   "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+          cl::desc("Enable abort calls when \"fast\" instruction fails"));
+static cl::opt<bool>
+SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
+                  cl::desc("Schedule copies of livein registers"),
+                  cl::init(false));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the first "
+                   "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the second "
+                   "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the post legalize types"
+                   " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+          cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+          cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+      cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+                  ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+                  ViewDAGCombine2 = false,
+                  ViewDAGCombineLT = false,
+                  ViewISelDAGs = false, ViewSchedDAGs = false,
+                  ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+               RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+            cl::init(&createDefaultScheduler),
+            cl::desc("Instruction schedulers available (before register"
+                     " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+                        createDefaultScheduler);
+
+namespace llvm {
+  //===--------------------------------------------------------------------===//
+  /// createDefaultScheduler - This creates an instruction scheduler appropriate
+  /// for the target.
+  ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+                                             CodeGenOpt::Level OptLevel) {
+    const TargetLowering &TLI = IS->getTargetLowering();
+
+    if (OptLevel == CodeGenOpt::None)
+      return createFastDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
+      return createTDListDAGScheduler(IS, OptLevel);
+    assert(TLI.getSchedulingPreference() ==
+           TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
+    return createBURRListDAGScheduler(IS, OptLevel);
+  }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag.  These
+// instructions are special in various ways, which require special support to
+// insert.  The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                         MachineBasicBlock *MBB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+#ifndef NDEBUG
+  dbgs() << "If a target marks an instruction with "
+          "'usesCustomInserter', it must implement "
+          "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+  llvm_unreachable(0);
+  return 0;
+}
+
+/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
+/// physical register has only a single copy use, then coalesced the copy
+/// if possible.
+static void EmitLiveInCopy(MachineBasicBlock *MBB,
+                           MachineBasicBlock::iterator &InsertPos,
+                           unsigned VirtReg, unsigned PhysReg,
+                           const TargetRegisterClass *RC,
+                           DenseMap<MachineInstr*, unsigned> &CopyRegMap,
+                           const MachineRegisterInfo &MRI,
+                           const TargetRegisterInfo &TRI,
+                           const TargetInstrInfo &TII) {
+  unsigned NumUses = 0;
+  MachineInstr *UseMI = NULL;
+  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
+         UE = MRI.use_end(); UI != UE; ++UI) {
+    UseMI = &*UI;
+    if (++NumUses > 1)
+      break;
+  }
+
+  // If the number of uses is not one, or the use is not a move instruction,
+  // don't coalesce. Also, only coalesce away a virtual register to virtual
+  // register copy.
+  bool Coalesced = false;
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (NumUses == 1 &&
+      TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+      TargetRegisterInfo::isVirtualRegister(DstReg)) {
+    VirtReg = DstReg;
+    Coalesced = true;
+  }
+
+  // Now find an ideal location to insert the copy.
+  MachineBasicBlock::iterator Pos = InsertPos;
+  while (Pos != MBB->begin()) {
+    MachineInstr *PrevMI = prior(Pos);
+    DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
+    // copyRegToReg might emit multiple instructions to do a copy.
+    unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
+    if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
+      // This is what the BB looks like right now:
+      // r1024 = mov r0
+      // ...
+      // r1    = mov r1024
+      //
+      // We want to insert "r1025 = mov r1". Inserting this copy below the
+      // move to r1024 makes it impossible for that move to be coalesced.
+      //
+      // r1025 = mov r1
+      // r1024 = mov r0
+      // ...
+      // r1    = mov 1024
+      // r2    = mov 1025
+      break; // Woot! Found a good location.
+    --Pos;
+  }
+
+  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+  (void) Emitted;
+
+  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+  if (Coalesced) {
+    if (&*InsertPos == UseMI) ++InsertPos;
+    MBB->erase(UseMI);
+  }
+}
+
+/// EmitLiveInCopies - If this is the first basic block in the function,
+/// and if it has live ins that need to be copied into vregs, emit the
+/// copies into the block.
+static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+                             const MachineRegisterInfo &MRI,
+                             const TargetRegisterInfo &TRI,
+                             const TargetInstrInfo &TII) {
+  if (SchedLiveInCopies) {
+    // Emit the copies at a heuristically-determined location in the block.
+    DenseMap<MachineInstr*, unsigned> CopyRegMap;
+    MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
+    for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+           E = MRI.livein_end(); LI != E; ++LI)
+      if (LI->second) {
+        const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
+        EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
+                       RC, CopyRegMap, MRI, TRI, TII);
+      }
+  } else {
+    // Emit the copies into the top of the block.
+    for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+           E = MRI.livein_end(); LI != E; ++LI)
+      if (LI->second) {
+        const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
+        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+                                        LI->second, LI->first, RC, RC);
+        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+        (void) Emitted;
+      }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
+  MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
+  FuncInfo(new FunctionLoweringInfo(TLI)),
+  CurDAG(new SelectionDAG(TLI, *FuncInfo)),
+  SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)),
+  GFI(),
+  OptLevel(OL),
+  DAGSize(0)
+{}
+
+SelectionDAGISel::~SelectionDAGISel() {
+  delete SDB;
+  delete CurDAG;
+  delete FuncInfo;
+}
+
+unsigned SelectionDAGISel::MakeReg(EVT VT) {
+  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<GCModuleInfo>();
+  AU.addRequired<DwarfWriter>();
+  AU.addPreserved<DwarfWriter>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+  Function &Fn = *mf.getFunction();
+
+  // Do some sanity-checking on the command-line options.
+  assert((!EnableFastISelVerbose || EnableFastISel) &&
+         "-fast-isel-verbose requires -fast-isel");
+  assert((!EnableFastISelAbort || EnableFastISel) &&
+         "-fast-isel-abort requires -fast-isel");
+
+  // Get alias analysis for load/store combining.
+  AA = &getAnalysis<AliasAnalysis>();
+
+  MF = &mf;
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+  if (Fn.hasGC())
+    GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn);
+  else
+    GFI = 0;
+  RegInfo = &MF->getRegInfo();
+  DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+  CurDAG->init(*MF, MMI, DW);
+  FuncInfo->set(Fn, *MF, EnableFastISel);
+  SDB->init(GFI, *AA);
+
+  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+    if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
+      // Mark landing pad.
+      FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+  SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII);
+
+  // If the first basic block in the function has live ins that need to be
+  // copied into vregs, emit the copies into the top of the block before
+  // emitting the code for the block.
+  EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII);
+
+  // Add function live-ins to entry block live-in set.
+  for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(),
+         E = RegInfo->livein_end(); I != E; ++I)
+    MF->begin()->addLiveIn(I->first);
+
+#ifndef NDEBUG
+  assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() &&
+         "Not all catch info was assigned to a landing pad!");
+#endif
+
+  FuncInfo->clear();
+
+  return true;
+}
+
+/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is
+/// attached with this instruction.
+static void SetDebugLoc(unsigned MDDbgKind, Instruction *I,
+                        SelectionDAGBuilder *SDB,
+                        FastISel *FastIS, MachineFunction *MF) {
+  if (isa<DbgInfoIntrinsic>(I)) return;
+  
+  if (MDNode *Dbg = I->getMetadata(MDDbgKind)) {
+    DILocation DILoc(Dbg);
+    DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+
+    SDB->setCurDebugLoc(Loc);
+
+    if (FastIS)
+      FastIS->setCurDebugLoc(Loc);
+
+    // If the function doesn't have a default debug location yet, set
+    // it. This is kind of a hack.
+    if (MF->getDefaultDebugLoc().isUnknown())
+      MF->setDefaultDebugLoc(Loc);
+  }
+}
+
+/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
+static void ResetDebugLoc(SelectionDAGBuilder *SDB, FastISel *FastIS) {
+  SDB->setCurDebugLoc(DebugLoc::getUnknownLoc());
+  if (FastIS)
+    FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc());
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
+                                        BasicBlock::iterator Begin,
+                                        BasicBlock::iterator End,
+                                        bool &HadTailCall) {
+  SDB->setCurrentBasicBlock(BB);
+  unsigned MDDbgKind = LLVMBB->getContext().getMDKindID("dbg");
+
+  // Lower all of the non-terminator instructions. If a call is emitted
+  // as a tail call, cease emitting nodes for this block.
+  for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
+    SetDebugLoc(MDDbgKind, I, SDB, 0, MF);
+
+    if (!isa<TerminatorInst>(I)) {
+      SDB->visit(*I);
+
+      // Set the current debug location back to "unknown" so that it doesn't
+      // spuriously apply to subsequent instructions.
+      ResetDebugLoc(SDB, 0);
+    }
+  }
+
+  if (!SDB->HasTailCall) {
+    // Ensure that all instructions which are used outside of their defining
+    // blocks are available as virtual registers.  Invoke is handled elsewhere.
+    for (BasicBlock::iterator I = Begin; I != End; ++I)
+      if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
+        SDB->CopyToExportRegsIfNeeded(I);
+
+    // Handle PHI nodes in successor blocks.
+    if (End == LLVMBB->end()) {
+      HandlePHINodesInSuccessorBlocks(LLVMBB);
+
+      // Lower the terminator after the copies are emitted.
+      SetDebugLoc(MDDbgKind, LLVMBB->getTerminator(), SDB, 0, MF);
+      SDB->visit(*LLVMBB->getTerminator());
+      ResetDebugLoc(SDB, 0);
+    }
+  }
+
+  // Make sure the root of the DAG is up-to-date.
+  CurDAG->setRoot(SDB->getControlRoot());
+
+  // Final step, emit the lowered DAG as machine code.
+  CodeGenAndEmitDAG();
+  HadTailCall = SDB->HasTailCall;
+  SDB->clear();
+}
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
+  SmallVector<SDNode*, 128> &Worklist;
+public:
+  SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl) : Worklist(wl) {}
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+                   Worklist.end());
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Ignore updates.
+  }
+};
+}
+
+/// TrivialTruncElim - Eliminate some trivial nops that can result from
+/// ShrinkDemandedOps: (trunc (ext n)) -> n.
+static bool TrivialTruncElim(SDValue Op,
+                             TargetLowering::TargetLoweringOpt &TLO) {
+  SDValue N0 = Op.getOperand(0);
+  EVT VT = Op.getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::SIGN_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND) &&
+      N0.getOperand(0).getValueType() == VT) {
+    return TLO.CombineTo(Op, N0.getOperand(0));
+  }
+  return false;
+}
+
+/// ShrinkDemandedOps - A late transformation pass that shrink expressions
+/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
+/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+void SelectionDAGISel::ShrinkDemandedOps() {
+  SmallVector<SDNode*, 128> Worklist;
+
+  // Add all the dag nodes to the worklist.
+  Worklist.reserve(CurDAG->allnodes_size());
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ++I)
+    Worklist.push_back(I);
+
+  APInt Mask;
+  APInt KnownZero;
+  APInt KnownOne;
+
+  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+  while (!Worklist.empty()) {
+    SDNode *N = Worklist.pop_back_val();
+
+    if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+
+    // Run ShrinkDemandedOp on scalar binary operations.
+    if (N->getNumValues() == 1 &&
+        N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
+      unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero, KnownOne;
+      if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+                                   KnownZero, KnownOne, TLO) ||
+          (N->getOpcode() == ISD::TRUNCATE &&
+           TrivialTruncElim(SDValue(N, 0), TLO))) {
+        // Revisit the node.
+        Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+                       Worklist.end());
+        Worklist.push_back(N);
+
+        // Replace the old value with the new one.
+        DEBUG(errs() << "\nReplacing "; 
+              TLO.Old.getNode()->dump(CurDAG);
+              errs() << "\nWith: ";
+              TLO.New.getNode()->dump(CurDAG);
+              errs() << '\n');
+
+        Worklist.push_back(TLO.New.getNode());
+
+        SDOPsWorkListRemover DeadNodes(Worklist);
+        CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+        if (TLO.Old.getNode()->use_empty()) {
+          for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+               i != e; ++i) {
+            SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
+            if (OpNode->hasOneUse()) {
+              Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                         OpNode), Worklist.end());
+              Worklist.push_back(OpNode);
+            }
+          }
+
+          Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                     TLO.Old.getNode()), Worklist.end());
+          CurDAG->DeleteNode(TLO.Old.getNode());
+        }
+      }
+    }
+  }
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+  SmallPtrSet<SDNode*, 128> VisitedNodes;
+  SmallVector<SDNode*, 128> Worklist;
+
+  Worklist.push_back(CurDAG->getRoot().getNode());
+
+  APInt Mask;
+  APInt KnownZero;
+  APInt KnownOne;
+
+  do {
+    SDNode *N = Worklist.pop_back_val();
+
+    // If we've already seen this node, ignore it.
+    if (!VisitedNodes.insert(N))
+      continue;
+
+    // Otherwise, add all chain operands to the worklist.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other)
+        Worklist.push_back(N->getOperand(i).getNode());
+
+    // If this is a CopyToReg with a vreg dest, process it.
+    if (N->getOpcode() != ISD::CopyToReg)
+      continue;
+
+    unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+      continue;
+
+    // Ignore non-scalar or non-integer values.
+    SDValue Src = N->getOperand(2);
+    EVT SrcVT = Src.getValueType();
+    if (!SrcVT.isInteger() || SrcVT.isVector())
+      continue;
+
+    unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+    Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
+    CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+
+    // Only install this information if it tells us something.
+    if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
+      DestReg -= TargetRegisterInfo::FirstVirtualRegister;
+      if (DestReg >= FuncInfo->LiveOutRegInfo.size())
+        FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+      FunctionLoweringInfo::LiveOutInfo &LOI =
+        FuncInfo->LiveOutRegInfo[DestReg];
+      LOI.NumSignBits = NumSignBits;
+      LOI.KnownOne = KnownOne;
+      LOI.KnownZero = KnownZero;
+    }
+  } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+  std::string GroupName;
+  if (TimePassesIsEnabled)
+    GroupName = "Instruction Selection and Scheduling";
+  std::string BlockName;
+  if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+      ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+      ViewSUnitDAGs)
+    BlockName = MF->getFunction()->getNameStr() + ":" +
+                BB->getBasicBlock()->getNameStr();
+
+  DEBUG(dbgs() << "Initial selection DAG:\n");
+  DEBUG(CurDAG->dump());
+
+  if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+  // Run the DAG combiner in pre-legalize mode.
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("DAG Combining 1", GroupName);
+    CurDAG->Combine(Unrestricted, *AA, OptLevel);
+  } else {
+    CurDAG->Combine(Unrestricted, *AA, OptLevel);
+  }
+
+  DEBUG(dbgs() << "Optimized lowered selection DAG:\n");
+  DEBUG(CurDAG->dump());
+
+  // Second step, hack on the DAG until it only uses operations and types that
+  // the target supports.
+  if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+                                               BlockName);
+
+  bool Changed;
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("Type Legalization", GroupName);
+    Changed = CurDAG->LegalizeTypes();
+  } else {
+    Changed = CurDAG->LegalizeTypes();
+  }
+
+  DEBUG(dbgs() << "Type-legalized selection DAG:\n");
+  DEBUG(CurDAG->dump());
+
+  if (Changed) {
+    if (ViewDAGCombineLT)
+      CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+    // Run the DAG combiner in post-type-legalize mode.
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T("DAG Combining after legalize types", GroupName);
+      CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+    } else {
+      CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+    }
+
+    DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n");
+    DEBUG(CurDAG->dump());
+  }
+
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("Vector Legalization", GroupName);
+    Changed = CurDAG->LegalizeVectors();
+  } else {
+    Changed = CurDAG->LegalizeVectors();
+  }
+
+  if (Changed) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T("Type Legalization 2", GroupName);
+      CurDAG->LegalizeTypes();
+    } else {
+      CurDAG->LegalizeTypes();
+    }
+
+    if (ViewDAGCombineLT)
+      CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+    // Run the DAG combiner in post-type-legalize mode.
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
+      CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+    } else {
+      CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+    }
+
+    DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n");
+    DEBUG(CurDAG->dump());
+  }
+
+  if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("DAG Legalization", GroupName);
+    CurDAG->Legalize(OptLevel);
+  } else {
+    CurDAG->Legalize(OptLevel);
+  }
+
+  DEBUG(dbgs() << "Legalized selection DAG:\n");
+  DEBUG(CurDAG->dump());
+
+  if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+  // Run the DAG combiner in post-legalize mode.
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("DAG Combining 2", GroupName);
+    CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+  } else {
+    CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+  }
+
+  DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
+  DEBUG(CurDAG->dump());
+
+  if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+  if (OptLevel != CodeGenOpt::None) {
+    ShrinkDemandedOps();
+    ComputeLiveOutVRegInfo();
+  }
+
+  // Third, instruction select all of the operations to machine code, adding the
+  // code to the MachineBasicBlock.
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("Instruction Selection", GroupName);
+    InstructionSelect();
+  } else {
+    InstructionSelect();
+  }
+
+  DEBUG(dbgs() << "Selected selection DAG:\n");
+  DEBUG(CurDAG->dump());
+
+  if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+  // Schedule machine code.
+  ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("Instruction Scheduling", GroupName);
+    Scheduler->Run(CurDAG, BB, BB->end());
+  } else {
+    Scheduler->Run(CurDAG, BB, BB->end());
+  }
+
+  if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+  // Emit machine code to BB.  This can change 'BB' to the last block being
+  // inserted into.
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("Instruction Creation", GroupName);
+    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
+  } else {
+    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
+  }
+
+  // Free the scheduler state.
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName);
+    delete Scheduler;
+  } else {
+    delete Scheduler;
+  }
+
+  DEBUG(dbgs() << "Selected machine code:\n");
+  DEBUG(BB->dump());
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
+                                            MachineFunction &MF,
+                                            MachineModuleInfo *MMI,
+                                            DwarfWriter *DW,
+                                            const TargetInstrInfo &TII) {
+  // Initialize the Fast-ISel state, if needed.
+  FastISel *FastIS = 0;
+  if (EnableFastISel)
+    FastIS = TLI.createFastISel(MF, MMI, DW,
+                                FuncInfo->ValueMap,
+                                FuncInfo->MBBMap,
+                                FuncInfo->StaticAllocaMap
+#ifndef NDEBUG
+                                , FuncInfo->CatchInfoLost
+#endif
+                                );
+
+  unsigned MDDbgKind = Fn.getContext().getMDKindID("dbg");
+
+  // Iterate over all basic blocks in the function.
+  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+    BasicBlock *LLVMBB = &*I;
+    BB = FuncInfo->MBBMap[LLVMBB];
+
+    BasicBlock::iterator const Begin = LLVMBB->begin();
+    BasicBlock::iterator const End = LLVMBB->end();
+    BasicBlock::iterator BI = Begin;
+
+    // Lower any arguments needed in this block if this is the entry block.
+    bool SuppressFastISel = false;
+    if (LLVMBB == &Fn.getEntryBlock()) {
+      LowerArguments(LLVMBB);
+
+      // If any of the arguments has the byval attribute, forgo
+      // fast-isel in the entry block.
+      if (FastIS) {
+        unsigned j = 1;
+        for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+             I != E; ++I, ++j)
+          if (Fn.paramHasAttr(j, Attribute::ByVal)) {
+            if (EnableFastISelVerbose || EnableFastISelAbort)
+              dbgs() << "FastISel skips entry block due to byval argument\n";
+            SuppressFastISel = true;
+            break;
+          }
+      }
+    }
+
+    if (MMI && BB->isLandingPad()) {
+      // Add a label to mark the beginning of the landing pad.  Deletion of the
+      // landing pad can thus be detected via the MachineModuleInfo.
+      unsigned LabelID = MMI->addLandingPad(BB);
+
+      const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL);
+      BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID);
+
+      // Mark exception register as live in.
+      unsigned Reg = TLI.getExceptionAddressRegister();
+      if (Reg) BB->addLiveIn(Reg);
+
+      // Mark exception selector register as live in.
+      Reg = TLI.getExceptionSelectorRegister();
+      if (Reg) BB->addLiveIn(Reg);
+
+      // FIXME: Hack around an exception handling flaw (PR1508): the personality
+      // function and list of typeids logically belong to the invoke (or, if you
+      // like, the basic block containing the invoke), and need to be associated
+      // with it in the dwarf exception handling tables.  Currently however the
+      // information is provided by an intrinsic (eh.selector) that can be moved
+      // to unexpected places by the optimizers: if the unwind edge is critical,
+      // then breaking it can result in the intrinsics being in the successor of
+      // the landing pad, not the landing pad itself.  This results
+      // in exceptions not being caught because no typeids are associated with
+      // the invoke.  This may not be the only way things can go wrong, but it
+      // is the only way we try to work around for the moment.
+      BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+      if (Br && Br->isUnconditional()) { // Critical edge?
+        BasicBlock::iterator I, E;
+        for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+          if (isa<EHSelectorInst>(I))
+            break;
+
+        if (I == E)
+          // No catch info found - try to extract some from the successor.
+          CopyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
+      }
+    }
+
+    // Before doing SelectionDAG ISel, see if FastISel has been requested.
+    if (FastIS && !SuppressFastISel) {
+      // Emit code for any incoming arguments. This must happen before
+      // beginning FastISel on the entry block.
+      if (LLVMBB == &Fn.getEntryBlock()) {
+        CurDAG->setRoot(SDB->getControlRoot());
+        CodeGenAndEmitDAG();
+        SDB->clear();
+      }
+      FastIS->startNewBlock(BB);
+      // Do FastISel on as many instructions as possible.
+      for (; BI != End; ++BI) {
+        // Just before the terminator instruction, insert instructions to
+        // feed PHI nodes in successor blocks.
+        if (isa<TerminatorInst>(BI))
+          if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+            ResetDebugLoc(SDB, FastIS);
+            if (EnableFastISelVerbose || EnableFastISelAbort) {
+              dbgs() << "FastISel miss: ";
+              BI->dump();
+            }
+            assert(!EnableFastISelAbort &&
+                   "FastISel didn't handle a PHI in a successor");
+            break;
+          }
+
+        SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF);
+
+        // Try to select the instruction with FastISel.
+        if (FastIS->SelectInstruction(BI)) {
+          ResetDebugLoc(SDB, FastIS);
+          continue;
+        }
+
+        // Clear out the debug location so that it doesn't carry over to
+        // unrelated instructions.
+        ResetDebugLoc(SDB, FastIS);
+
+        // Then handle certain instructions as single-LLVM-Instruction blocks.
+        if (isa<CallInst>(BI)) {
+          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            dbgs() << "FastISel missed call: ";
+            BI->dump();
+          }
+
+          if (!BI->getType()->isVoidTy()) {
+            unsigned &R = FuncInfo->ValueMap[BI];
+            if (!R)
+              R = FuncInfo->CreateRegForValue(BI);
+          }
+
+          bool HadTailCall = false;
+          SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall);
+
+          // If the call was emitted as a tail call, we're done with the block.
+          if (HadTailCall) {
+            BI = End;
+            break;
+          }
+
+          // If the instruction was codegen'd with multiple blocks,
+          // inform the FastISel object where to resume inserting.
+          FastIS->setCurrentBlock(BB);
+          continue;
+        }
+
+        // Otherwise, give up on FastISel for the rest of the block.
+        // For now, be a little lenient about non-branch terminators.
+        if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            dbgs() << "FastISel miss: ";
+            BI->dump();
+          }
+          if (EnableFastISelAbort)
+            // The "fast" selector couldn't handle something and bailed.
+            // For the purpose of debugging, just abort.
+            llvm_unreachable("FastISel didn't select the entire block");
+        }
+        break;
+      }
+    }
+
+    // Run SelectionDAG instruction selection on the remainder of the block
+    // not handled by FastISel. If FastISel is not run, this is the entire
+    // block.
+    if (BI != End) {
+      bool HadTailCall;
+      SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
+    }
+
+    FinishBasicBlock();
+  }
+
+  delete FastIS;
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+  DEBUG(dbgs() << "Target-post-processed machine code:\n");
+  DEBUG(BB->dump());
+
+  DEBUG(dbgs() << "Total amount of phi nodes to update: "
+               << SDB->PHINodesToUpdate.size() << "\n");
+  DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
+          dbgs() << "Node " << i << " : ("
+                 << SDB->PHINodesToUpdate[i].first
+                 << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
+
+  // Next, now that we know what the last MBB the LLVM BB expanded is, update
+  // PHI nodes in successors.
+  if (SDB->SwitchCases.empty() &&
+      SDB->JTCases.empty() &&
+      SDB->BitTestCases.empty()) {
+    for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
+                                                false));
+      PHI->addOperand(MachineOperand::CreateMBB(BB));
+    }
+    SDB->PHINodesToUpdate.clear();
+    return;
+  }
+
+  for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!SDB->BitTestCases[i].Emitted) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = SDB->BitTestCases[i].Parent;
+      SDB->setCurrentBasicBlock(BB);
+      // Emit the code
+      SDB->visitBitTestHeader(SDB->BitTestCases[i]);
+      CurDAG->setRoot(SDB->getRoot());
+      CodeGenAndEmitDAG();
+      SDB->clear();
+    }
+
+    for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = SDB->BitTestCases[i].Cases[j].ThisBB;
+      SDB->setCurrentBasicBlock(BB);
+      // Emit the code
+      if (j+1 != ej)
+        SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j]);
+      else
+        SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j]);
+
+
+      CurDAG->setRoot(SDB->getRoot());
+      CodeGenAndEmitDAG();
+      SDB->clear();
+    }
+
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      // This is "default" BB. We have two jumps to it. From "header" BB and
+      // from last "case" BB.
+      if (PHIBB == SDB->BitTestCases[i].Default) {
+        PHI->addOperand(MachineOperand::
+                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
+        PHI->addOperand(MachineOperand::
+                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
+                                                  back().ThisBB));
+      }
+      // One of "cases" BB.
+      for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
+           j != ej; ++j) {
+        MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+        if (cBB->isSuccessor(PHIBB)) {
+          PHI->addOperand(MachineOperand::
+                          CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+          PHI->addOperand(MachineOperand::CreateMBB(cBB));
+        }
+      }
+    }
+  }
+  SDB->BitTestCases.clear();
+
+  // If the JumpTable record is filled in, then we need to emit a jump table.
+  // Updating the PHI nodes is tricky in this case, since we need to determine
+  // whether the PHI is a successor of the range check MBB or the jump table MBB
+  for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!SDB->JTCases[i].first.Emitted) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = SDB->JTCases[i].first.HeaderBB;
+      SDB->setCurrentBasicBlock(BB);
+      // Emit the code
+      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first);
+      CurDAG->setRoot(SDB->getRoot());
+      CodeGenAndEmitDAG();
+      SDB->clear();
+    }
+
+    // Set the current basic block to the mbb we wish to insert the code into
+    BB = SDB->JTCases[i].second.MBB;
+    SDB->setCurrentBasicBlock(BB);
+    // Emit the code
+    SDB->visitJumpTable(SDB->JTCases[i].second);
+    CurDAG->setRoot(SDB->getRoot());
+    CodeGenAndEmitDAG();
+    SDB->clear();
+
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      // "default" BB. We can go there only from header BB.
+      if (PHIBB == SDB->JTCases[i].second.Default) {
+        PHI->addOperand
+          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+        PHI->addOperand
+          (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
+      }
+      // JT BB. Just iterate over successors here
+      if (BB->isSuccessor(PHIBB)) {
+        PHI->addOperand
+          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+        PHI->addOperand(MachineOperand::CreateMBB(BB));
+      }
+    }
+  }
+  SDB->JTCases.clear();
+
+  // If the switch block involved a branch to one of the actual successors, we
+  // need to update PHI nodes in that block.
+  for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+    assert(PHI->isPHI() &&
+           "This is not a machine PHI node that we are updating!");
+    if (BB->isSuccessor(PHI->getParent())) {
+      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
+                                                false));
+      PHI->addOperand(MachineOperand::CreateMBB(BB));
+    }
+  }
+
+  // If we generated any switch lowering information, build and codegen any
+  // additional DAGs necessary.
+  for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+    // Set the current basic block to the mbb we wish to insert the code into
+    MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
+    SDB->setCurrentBasicBlock(BB);
+
+    // Emit the code
+    SDB->visitSwitchCase(SDB->SwitchCases[i]);
+    CurDAG->setRoot(SDB->getRoot());
+    CodeGenAndEmitDAG();
+
+    // Handle any PHI nodes in successors of this chunk, as if we were coming
+    // from the original BB before switch expansion.  Note that PHI nodes can
+    // occur multiple times in PHINodesToUpdate.  We have to be very careful to
+    // handle them the right number of times.
+    while ((BB = SDB->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
+      // If new BB's are created during scheduling, the edges may have been
+      // updated. That is, the edge from ThisBB to BB may have been split and
+      // BB's predecessor is now another block.
+      DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI =
+        SDB->EdgeMapping.find(BB);
+      if (EI != SDB->EdgeMapping.end())
+        ThisBB = EI->second;
+
+      // BB may have been removed from the CFG if a branch was constant folded.
+      if (ThisBB->isSuccessor(BB)) {
+        for (MachineBasicBlock::iterator Phi = BB->begin();
+             Phi != BB->end() && Phi->isPHI();
+             ++Phi) {
+          // This value for this PHI node is recorded in PHINodesToUpdate.
+          for (unsigned pn = 0; ; ++pn) {
+            assert(pn != SDB->PHINodesToUpdate.size() &&
+                   "Didn't find PHI entry!");
+            if (SDB->PHINodesToUpdate[pn].first == Phi) {
+              Phi->addOperand(MachineOperand::
+                              CreateReg(SDB->PHINodesToUpdate[pn].second,
+                                        false));
+              Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+              break;
+            }
+          }
+        }
+      }
+
+      // Don't process RHS if same block as LHS.
+      if (BB == SDB->SwitchCases[i].FalseBB)
+        SDB->SwitchCases[i].FalseBB = 0;
+
+      // If we haven't handled the RHS, do so now.  Otherwise, we're done.
+      SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB;
+      SDB->SwitchCases[i].FalseBB = 0;
+    }
+    assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0);
+    SDB->clear();
+  }
+  SDB->SwitchCases.clear();
+
+  SDB->PHINodesToUpdate.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+  RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+  if (!Ctor) {
+    Ctor = ISHeuristic;
+    RegisterScheduler::setDefault(Ctor);
+  }
+
+  return Ctor(this, OptLevel);
+}
+
+ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
+  return new ScheduleHazardRecognizer();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+                                    int64_t DesiredMaskS) const {
+  const APInt &ActualMask = RHS->getAPIntValue();
+  const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask.intersects(~DesiredMask))
+    return false;
+
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  APInt NeededMask = DesiredMask & ~ActualMask;
+  if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+    return true;
+
+  // TODO: check to see if missing bits are just not demanded.
+
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+                                   int64_t DesiredMaskS) const {
+  const APInt &ActualMask = RHS->getAPIntValue();
+  const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask.intersects(~DesiredMask))
+    return false;
+
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  APInt NeededMask = DesiredMask & ~ActualMask;
+
+  APInt KnownZero, KnownOne;
+  CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+
+  // If all the missing bits in the or are already known to be set, match!
+  if ((NeededMask & KnownOne) == NeededMask)
+    return true;
+
+  // TODO: check to see if missing bits are just not demanded.
+
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen.  Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+  std::vector<SDValue> InOps;
+  std::swap(InOps, Ops);
+
+  Ops.push_back(InOps[0]);  // input chain.
+  Ops.push_back(InOps[1]);  // input asm string.
+
+  unsigned i = 2, e = InOps.size();
+  if (InOps[e-1].getValueType() == MVT::Flag)
+    --e;  // Don't process a flag operand if it is here.
+
+  while (i != e) {
+    unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+    if ((Flags & 7) != 4 /*MEM*/) {
+      // Just skip over this operand, copying the operands verbatim.
+      Ops.insert(Ops.end(), InOps.begin()+i,
+                 InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+      i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+    } else {
+      assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+             "Memory operand with multiple values?");
+      // Otherwise, this is a memory operand.  Ask the target to select it.
+      std::vector<SDValue> SelOps;
+      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
+        llvm_report_error("Could not match memory address.  Inline asm"
+                          " failure!");
+      }
+
+      // Add this to the output node.
+      Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
+                                              MVT::i32));
+      Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+      i += 2;
+    }
+  }
+
+  // Add the flag input back if present.
+  if (e != InOps.size())
+    Ops.push_back(InOps.back());
+}
+
+/// findFlagUse - Return use of EVT::Flag value produced by the specified
+/// SDNode.
+///
+static SDNode *findFlagUse(SDNode *N) {
+  unsigned FlagResNo = N->getNumValues()-1;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    SDUse &Use = I.getUse();
+    if (Use.getResNo() == FlagResNo)
+      return Use.getUser();
+  }
+  return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+                          SDNode *Root,
+                          SmallPtrSet<SDNode*, 16> &Visited) {
+  if (Use->getNodeId() < Def->getNodeId() ||
+      !Visited.insert(Use))
+    return false;
+
+  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+    SDNode *N = Use->getOperand(i).getNode();
+    if (N == Def) {
+      if (Use == ImmedUse || Use == Root)
+        continue;  // We are not looking for immediate use.
+      assert(N != Root);
+      return true;
+    }
+
+    // Traverse up the operand chain.
+    if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
+      return true;
+  }
+  return false;
+}
+
+/// isNonImmUse - Start searching from Root up the DAG to check is Def can
+/// be reached. Return true if that's the case. However, ignore direct uses
+/// by ImmedUse (which would be U in the example illustrated in
+/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
+/// case).
+/// FIXME: to be really generic, we should allow direct use by any node
+/// that is being folded. But realisticly since we only fold loads which
+/// have one non-chain use, we only need to watch out for load/op/store
+/// and load/op/cmp case where the root (store / cmp) may reach the load via
+/// its chain operand.
+static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
+  SmallPtrSet<SDNode*, 16> Visited;
+  return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
+}
+
+/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root and
+/// folding N is profitable.
+bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
+                                                  SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
+
+  // If Root use can somehow reach N through a path that that doesn't contain
+  // U then folding N would create a cycle. e.g. In the following
+  // diagram, Root can reach N through X. If N is folded into into Root, then
+  // X is both a predecessor and a successor of U.
+  //
+  //          [N*]           //
+  //         ^   ^           //
+  //        /     \          //
+  //      [U*]    [X]?       //
+  //        ^     ^          //
+  //         \   /           //
+  //          \ /            //
+  //         [Root*]         //
+  //
+  // * indicates nodes to be folded together.
+  //
+  // If Root produces a flag, then it gets (even more) interesting. Since it
+  // will be "glued" together with its flag use in the scheduler, we need to
+  // check if it might reach N.
+  //
+  //          [N*]           //
+  //         ^   ^           //
+  //        /     \          //
+  //      [U*]    [X]?       //
+  //        ^       ^        //
+  //         \       \       //
+  //          \      |       //
+  //         [Root*] |       //
+  //          ^      |       //
+  //          f      |       //
+  //          |      /       //
+  //         [Y]    /        //
+  //           ^   /         //
+  //           f  /          //
+  //           | /           //
+  //          [FU]           //
+  //
+  // If FU (flag use) indirectly reaches N (the load), and Root folds N
+  // (call it Fold), then X is a predecessor of FU and a successor of
+  // Fold. But since Fold and FU are flagged together, this will create
+  // a cycle in the scheduling graph.
+
+  EVT VT = Root->getValueType(Root->getNumValues()-1);
+  while (VT == MVT::Flag) {
+    SDNode *FU = findFlagUse(Root);
+    if (FU == NULL)
+      break;
+    Root = FU;
+    VT = Root->getValueType(Root->getNumValues()-1);
+  }
+
+  return !isNonImmUse(Root, N, U);
+}
+
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+  std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+  SelectInlineAsmMemoryOperands(Ops);
+    
+  std::vector<EVT> VTs;
+  VTs.push_back(MVT::Other);
+  VTs.push_back(MVT::Flag);
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+                                VTs, &Ops[0], Ops.size());
+  return New.getNode();
+}
+
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+  return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
+}
+
+SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  unsigned C = cast<LabelSDNode>(N)->getLabelID();
+  SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
+  return CurDAG->SelectNodeTo(N, TargetOpcode::EH_LABEL,
+                              MVT::Other, Tmp, Chain);
+}
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+  std::string msg;
+  raw_string_ostream Msg(msg);
+  Msg << "Cannot yet select: ";
+  N->printrFull(Msg, CurDAG);
+  llvm_report_error(Msg.str());
+}
+
+void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) {
+  dbgs() << "Cannot yet select: ";
+  unsigned iid =
+    cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() ==
+                                       MVT::Other))->getZExtValue();
+  if (iid < Intrinsic::num_intrinsics)
+    llvm_report_error("Cannot yet select: intrinsic %" +
+                      Intrinsic::getName((Intrinsic::ID)iid));
+  else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
+    llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
+                      tii->getName(iid));
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000..3786bd1
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,301 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+    explicit DOTGraphTraits(bool isSimple=false) :
+      DefaultDOTGraphTraits(isSimple) {}
+
+    static bool hasEdgeDestLabels() {
+      return true;
+    }
+
+    static unsigned numEdgeDestLabels(const void *Node) {
+      return ((const SDNode *) Node)->getNumValues();
+    }
+
+    static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+      return ((const SDNode *) Node)->getValueType(i).getEVTString();
+    }
+
+    template<typename EdgeIter>
+    static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+      return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+    }
+
+    /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+    /// should actually target another edge source, not a node.  If this method
+    /// is implemented, getEdgeTarget should be implemented.
+    template<typename EdgeIter>
+    static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+      return true;
+    }
+
+    /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+    /// called to determine which outgoing edge of Node is the target of this
+    /// edge.
+    template<typename EdgeIter>
+    static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+      SDNode *TargetNode = *I;
+      SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+      std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+      return NI;
+    }
+
+    static std::string getGraphName(const SelectionDAG *G) {
+      return G->getMachineFunction().getFunction()->getName();
+    }
+
+    static bool renderGraphFromBottomUp() {
+      return true;
+    }
+
+    static bool hasNodeAddressLabel(const SDNode *Node,
+                                    const SelectionDAG *Graph) {
+      return true;
+    }
+
+    /// If you want to override the dot attributes printed for a particular
+    /// edge, override this method.
+    template<typename EdgeIter>
+    static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+      SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+      EVT VT = Op.getValueType();
+      if (VT == MVT::Flag)
+        return "color=red,style=bold";
+      else if (VT == MVT::Other)
+        return "color=blue,style=dashed";
+      return "";
+    }
+
+
+    static std::string getSimpleNodeLabel(const SDNode *Node,
+                                          const SelectionDAG *G) {
+      std::string Result = Node->getOperationName(G);
+      {
+        raw_string_ostream OS(Result);
+        Node->print_details(OS, G);
+      }
+      return Result;
+    }
+    std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+    static std::string getNodeAttributes(const SDNode *N,
+                                         const SelectionDAG *Graph) {
+#ifndef NDEBUG
+      const std::string &Attrs = Graph->getGraphAttrs(N);
+      if (!Attrs.empty()) {
+        if (Attrs.find("shape=") == std::string::npos)
+          return std::string("shape=Mrecord,") + Attrs;
+        else
+          return Attrs;
+      }
+#endif
+      return "shape=Mrecord";
+    }
+
+    static void addCustomGraphFeatures(SelectionDAG *G,
+                                       GraphWriter<SelectionDAG*> &GW) {
+      GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+      if (G->getRoot().getNode())
+        GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+                    "color=blue,style=dashed");
+    }
+  };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+                                                        const SelectionDAG *G) {
+  return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+            false, Title);
+#else
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+  viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+  NodeGraphAttrs.clear();
+#else
+  errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = Attrs;
+#else
+  errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+  std::map<const SDNode *, std::string>::const_iterator I =
+    NodeGraphAttrs.find(N);
+
+  if (I != NodeGraphAttrs.end())
+    return I->second;
+  else
+    return "";
+#else
+  errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+  return std::string("");
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+  errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor.  Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+                                          int level, bool &printed) {
+  bool hit_limit = false;
+
+#ifndef NDEBUG
+  if (level >= 20) {
+    if (!printed) {
+      printed = true;
+      DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+    }
+    return true;
+  }
+
+  unsigned oldSize = visited.size();
+  visited.insert(N);
+  if (visited.size() != oldSize) {
+    setGraphColor(N, Color);
+    for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+        i != iend;
+        ++i) {
+      hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+    }
+  }
+#else
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+  return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+  DenseSet<SDNode *> visited;
+  bool printed = false;
+  if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+    // Visually mark that we hit the limit
+    if (strcmp(Color, "red") == 0) {
+      setSubgraphColorHelper(N, "blue", visited, 0, printed);
+    } else if (strcmp(Color, "yellow") == 0) {
+      setSubgraphColorHelper(N, "green", visited, 0, printed);
+    }
+  }
+
+#else
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+  std::string s;
+  raw_string_ostream O(s);
+  O << "SU(" << SU->NodeNum << "): ";
+  if (SU->getNode()) {
+    SmallVector<SDNode *, 4> FlaggedNodes;
+    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
+      FlaggedNodes.push_back(N);
+    while (!FlaggedNodes.empty()) {
+      O << DOTGraphTraits<SelectionDAG*>
+	     ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
+      FlaggedNodes.pop_back();
+      if (!FlaggedNodes.empty())
+        O << "\n    ";
+    }
+  } else {
+    O << "CROSS RC COPY";
+  }
+  return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+  if (DAG) {
+    // Draw a special "GraphRoot" node to indicate the root of the graph.
+    GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+    const SDNode *N = DAG->getRoot().getNode();
+    if (N && N->getNodeId() != -1)
+      GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+                  "color=blue,style=dashed");
+  }
+}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000..d74ec7e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,2675 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+namespace llvm {
+TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
+  bool isLocal = GV->hasLocalLinkage();
+  bool isDeclaration = GV->isDeclaration();
+  // FIXME: what should we do for protected and internal visibility?
+  // For variables, is internal different from hidden?
+  bool isHidden = GV->hasHiddenVisibility();
+
+  if (reloc == Reloc::PIC_) {
+    if (isLocal || isHidden)
+      return TLSModel::LocalDynamic;
+    else
+      return TLSModel::GeneralDynamic;
+  } else {
+    if (!isDeclaration || isHidden)
+      return TLSModel::LocalExec;
+    else
+      return TLSModel::InitialExec;
+  }
+}
+}
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+  Names[RTLIB::SHL_I16] = "__ashlhi3";
+  Names[RTLIB::SHL_I32] = "__ashlsi3";
+  Names[RTLIB::SHL_I64] = "__ashldi3";
+  Names[RTLIB::SHL_I128] = "__ashlti3";
+  Names[RTLIB::SRL_I16] = "__lshrhi3";
+  Names[RTLIB::SRL_I32] = "__lshrsi3";
+  Names[RTLIB::SRL_I64] = "__lshrdi3";
+  Names[RTLIB::SRL_I128] = "__lshrti3";
+  Names[RTLIB::SRA_I16] = "__ashrhi3";
+  Names[RTLIB::SRA_I32] = "__ashrsi3";
+  Names[RTLIB::SRA_I64] = "__ashrdi3";
+  Names[RTLIB::SRA_I128] = "__ashrti3";
+  Names[RTLIB::MUL_I8] = "__mulqi3";
+  Names[RTLIB::MUL_I16] = "__mulhi3";
+  Names[RTLIB::MUL_I32] = "__mulsi3";
+  Names[RTLIB::MUL_I64] = "__muldi3";
+  Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::SDIV_I8] = "__divqi3";
+  Names[RTLIB::SDIV_I16] = "__divhi3";
+  Names[RTLIB::SDIV_I32] = "__divsi3";
+  Names[RTLIB::SDIV_I64] = "__divdi3";
+  Names[RTLIB::SDIV_I128] = "__divti3";
+  Names[RTLIB::UDIV_I8] = "__udivqi3";
+  Names[RTLIB::UDIV_I16] = "__udivhi3";
+  Names[RTLIB::UDIV_I32] = "__udivsi3";
+  Names[RTLIB::UDIV_I64] = "__udivdi3";
+  Names[RTLIB::UDIV_I128] = "__udivti3";
+  Names[RTLIB::SREM_I8] = "__modqi3";
+  Names[RTLIB::SREM_I16] = "__modhi3";
+  Names[RTLIB::SREM_I32] = "__modsi3";
+  Names[RTLIB::SREM_I64] = "__moddi3";
+  Names[RTLIB::SREM_I128] = "__modti3";
+  Names[RTLIB::UREM_I8] = "__umodqi3";
+  Names[RTLIB::UREM_I16] = "__umodhi3";
+  Names[RTLIB::UREM_I32] = "__umodsi3";
+  Names[RTLIB::UREM_I64] = "__umoddi3";
+  Names[RTLIB::UREM_I128] = "__umodti3";
+  Names[RTLIB::NEG_I32] = "__negsi2";
+  Names[RTLIB::NEG_I64] = "__negdi2";
+  Names[RTLIB::ADD_F32] = "__addsf3";
+  Names[RTLIB::ADD_F64] = "__adddf3";
+  Names[RTLIB::ADD_F80] = "__addxf3";
+  Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+  Names[RTLIB::SUB_F32] = "__subsf3";
+  Names[RTLIB::SUB_F64] = "__subdf3";
+  Names[RTLIB::SUB_F80] = "__subxf3";
+  Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+  Names[RTLIB::MUL_F32] = "__mulsf3";
+  Names[RTLIB::MUL_F64] = "__muldf3";
+  Names[RTLIB::MUL_F80] = "__mulxf3";
+  Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+  Names[RTLIB::DIV_F32] = "__divsf3";
+  Names[RTLIB::DIV_F64] = "__divdf3";
+  Names[RTLIB::DIV_F80] = "__divxf3";
+  Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+  Names[RTLIB::REM_F32] = "fmodf";
+  Names[RTLIB::REM_F64] = "fmod";
+  Names[RTLIB::REM_F80] = "fmodl";
+  Names[RTLIB::REM_PPCF128] = "fmodl";
+  Names[RTLIB::POWI_F32] = "__powisf2";
+  Names[RTLIB::POWI_F64] = "__powidf2";
+  Names[RTLIB::POWI_F80] = "__powixf2";
+  Names[RTLIB::POWI_PPCF128] = "__powitf2";
+  Names[RTLIB::SQRT_F32] = "sqrtf";
+  Names[RTLIB::SQRT_F64] = "sqrt";
+  Names[RTLIB::SQRT_F80] = "sqrtl";
+  Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+  Names[RTLIB::LOG_F32] = "logf";
+  Names[RTLIB::LOG_F64] = "log";
+  Names[RTLIB::LOG_F80] = "logl";
+  Names[RTLIB::LOG_PPCF128] = "logl";
+  Names[RTLIB::LOG2_F32] = "log2f";
+  Names[RTLIB::LOG2_F64] = "log2";
+  Names[RTLIB::LOG2_F80] = "log2l";
+  Names[RTLIB::LOG2_PPCF128] = "log2l";
+  Names[RTLIB::LOG10_F32] = "log10f";
+  Names[RTLIB::LOG10_F64] = "log10";
+  Names[RTLIB::LOG10_F80] = "log10l";
+  Names[RTLIB::LOG10_PPCF128] = "log10l";
+  Names[RTLIB::EXP_F32] = "expf";
+  Names[RTLIB::EXP_F64] = "exp";
+  Names[RTLIB::EXP_F80] = "expl";
+  Names[RTLIB::EXP_PPCF128] = "expl";
+  Names[RTLIB::EXP2_F32] = "exp2f";
+  Names[RTLIB::EXP2_F64] = "exp2";
+  Names[RTLIB::EXP2_F80] = "exp2l";
+  Names[RTLIB::EXP2_PPCF128] = "exp2l";
+  Names[RTLIB::SIN_F32] = "sinf";
+  Names[RTLIB::SIN_F64] = "sin";
+  Names[RTLIB::SIN_F80] = "sinl";
+  Names[RTLIB::SIN_PPCF128] = "sinl";
+  Names[RTLIB::COS_F32] = "cosf";
+  Names[RTLIB::COS_F64] = "cos";
+  Names[RTLIB::COS_F80] = "cosl";
+  Names[RTLIB::COS_PPCF128] = "cosl";
+  Names[RTLIB::POW_F32] = "powf";
+  Names[RTLIB::POW_F64] = "pow";
+  Names[RTLIB::POW_F80] = "powl";
+  Names[RTLIB::POW_PPCF128] = "powl";
+  Names[RTLIB::CEIL_F32] = "ceilf";
+  Names[RTLIB::CEIL_F64] = "ceil";
+  Names[RTLIB::CEIL_F80] = "ceill";
+  Names[RTLIB::CEIL_PPCF128] = "ceill";
+  Names[RTLIB::TRUNC_F32] = "truncf";
+  Names[RTLIB::TRUNC_F64] = "trunc";
+  Names[RTLIB::TRUNC_F80] = "truncl";
+  Names[RTLIB::TRUNC_PPCF128] = "truncl";
+  Names[RTLIB::RINT_F32] = "rintf";
+  Names[RTLIB::RINT_F64] = "rint";
+  Names[RTLIB::RINT_F80] = "rintl";
+  Names[RTLIB::RINT_PPCF128] = "rintl";
+  Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+  Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+  Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+  Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+  Names[RTLIB::FLOOR_F32] = "floorf";
+  Names[RTLIB::FLOOR_F64] = "floor";
+  Names[RTLIB::FLOOR_F80] = "floorl";
+  Names[RTLIB::FLOOR_PPCF128] = "floorl";
+  Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+  Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+  Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+  Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+  Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+  Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8";
+  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16";
+  Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+  Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+  Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+  Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+  Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+  Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+  Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+  Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+  Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+  Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+  Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+  Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8";
+  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16";
+  Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+  Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+  Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+  Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+  Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+  Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+  Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+  Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+  Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+  Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+  Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+  Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+  Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+  Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+  Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+  Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+  Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+  Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+  Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+  Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+  Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+  Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+  Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+  Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+  Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+  Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+  Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+  Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+  Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+  Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+  Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+  Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+  Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+  Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+  Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+  Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+  Names[RTLIB::OEQ_F32] = "__eqsf2";
+  Names[RTLIB::OEQ_F64] = "__eqdf2";
+  Names[RTLIB::UNE_F32] = "__nesf2";
+  Names[RTLIB::UNE_F64] = "__nedf2";
+  Names[RTLIB::OGE_F32] = "__gesf2";
+  Names[RTLIB::OGE_F64] = "__gedf2";
+  Names[RTLIB::OLT_F32] = "__ltsf2";
+  Names[RTLIB::OLT_F64] = "__ltdf2";
+  Names[RTLIB::OLE_F32] = "__lesf2";
+  Names[RTLIB::OLE_F64] = "__ledf2";
+  Names[RTLIB::OGT_F32] = "__gtsf2";
+  Names[RTLIB::OGT_F64] = "__gtdf2";
+  Names[RTLIB::UO_F32] = "__unordsf2";
+  Names[RTLIB::UO_F64] = "__unorddf2";
+  Names[RTLIB::O_F32] = "__unordsf2";
+  Names[RTLIB::O_F64] = "__unorddf2";
+  Names[RTLIB::MEMCPY] = "memcpy";
+  Names[RTLIB::MEMMOVE] = "memmove";
+  Names[RTLIB::MEMSET] = "memset";
+  Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+    CCs[i] = CallingConv::C;
+  }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::f64)
+      return FPEXT_F32_F64;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+  if (RetVT == MVT::f32) {
+    if (OpVT == MVT::f64)
+      return FPROUND_F64_F32;
+    if (OpVT == MVT::f80)
+      return FPROUND_F80_F32;
+    if (OpVT == MVT::ppcf128)
+      return FPROUND_PPCF128_F32;
+  } else if (RetVT == MVT::f64) {
+    if (OpVT == MVT::f80)
+      return FPROUND_F80_F64;
+    if (OpVT == MVT::ppcf128)
+      return FPROUND_PPCF128_F64;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F32_I16;
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F32_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F32_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F32_I128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F64_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F64_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F64_I128;
+  } else if (OpVT == MVT::f80) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F80_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F80_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F80_I128;
+  } else if (OpVT == MVT::ppcf128) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_PPCF128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_PPCF128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_PPCF128_I128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F32_I16;
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F32_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F32_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F32_I128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F64_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F64_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F64_I128;
+  } else if (OpVT == MVT::f80) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F80_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F80_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F80_I128;
+  } else if (OpVT == MVT::ppcf128) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_PPCF128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_PPCF128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_PPCF128_I128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::i32) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I32_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I32_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I32_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I32_PPCF128;
+  } else if (OpVT == MVT::i64) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I64_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I64_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I64_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I64_PPCF128;
+  } else if (OpVT == MVT::i128) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I128_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I128_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I128_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I128_PPCF128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::i32) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I32_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I32_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I32_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I32_PPCF128;
+  } else if (OpVT == MVT::i64) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I64_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I64_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I64_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I64_PPCF128;
+  } else if (OpVT == MVT::i128) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I128_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I128_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I128_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I128_PPCF128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+  memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+  CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+  CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+  CCs[RTLIB::UNE_F32] = ISD::SETNE;
+  CCs[RTLIB::UNE_F64] = ISD::SETNE;
+  CCs[RTLIB::OGE_F32] = ISD::SETGE;
+  CCs[RTLIB::OGE_F64] = ISD::SETGE;
+  CCs[RTLIB::OLT_F32] = ISD::SETLT;
+  CCs[RTLIB::OLT_F64] = ISD::SETLT;
+  CCs[RTLIB::OLE_F32] = ISD::SETLE;
+  CCs[RTLIB::OLE_F64] = ISD::SETLE;
+  CCs[RTLIB::OGT_F32] = ISD::SETGT;
+  CCs[RTLIB::OGT_F64] = ISD::SETGT;
+  CCs[RTLIB::UO_F32] = ISD::SETNE;
+  CCs[RTLIB::UO_F64] = ISD::SETNE;
+  CCs[RTLIB::O_F32] = ISD::SETEQ;
+  CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
+  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
+  // All operations default to being supported.
+  memset(OpActions, 0, sizeof(OpActions));
+  memset(LoadExtActions, 0, sizeof(LoadExtActions));
+  memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+  memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+  memset(ConvertActions, 0, sizeof(ConvertActions));
+  memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+  // Set default actions for various operations.
+  for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+    // Default all indexed load / store to expand.
+    for (unsigned IM = (unsigned)ISD::PRE_INC;
+         IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+      setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+      setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+    }
+    
+    // These operations default to expand.
+    setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+  }
+
+  // Most targets ignore the @llvm.prefetch intrinsic.
+  setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+  
+  // ConstantFP nodes default to expand.  Targets can either change this to 
+  // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+  // to optimize expansions for certain constants.
+  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+
+  // These library functions default to expand.
+  setOperationAction(ISD::FLOG , MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10,MVT::f64, Expand);
+  setOperationAction(ISD::FEXP , MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG , MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10,MVT::f32, Expand);
+  setOperationAction(ISD::FEXP , MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+
+  // Default ISD::TRAP to expand (which turns it into abort).
+  setOperationAction(ISD::TRAP, MVT::Other, Expand);
+    
+  IsLittleEndian = TD->isLittleEndian();
+  ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+  maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+  benefitFromCodePlacementOpt = false;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  BooleanContents = UndefinedBooleanContent;
+  SchedPreferenceInfo = SchedulingForLatency;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  IfCvtBlockSizeLimit = 2;
+  IfCvtDupBlockSizeLimit = 0;
+  PrefLoopAlignment = 0;
+
+  InitLibcallNames(LibcallRoutineNames);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLowering::~TargetLowering() {
+  delete &TLOF;
+}
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+                                       unsigned &NumIntermediates,
+                                       EVT &RegisterVT,
+                                       TargetLowering* TLI) {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT EltTy = VT.getVectorElementType();
+  
+  unsigned NumVectorRegs = 1;
+  
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we 
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+  
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+  
+  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+  if (!TLI->isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  EVT DestVT = TLI->getRegisterType(NewVT);
+  RegisterVT = DestVT;
+  if (EVT(DestVT).bitsLT(NewVT)) {
+    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+  } else {
+    // Otherwise, promotion or legal types use the same number of registers as
+    // the vector decimated to the appropriate level.
+    return NumVectorRegs;
+  }
+  
+  return 1;
+}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+         "Too many value types for ValueTypeActions to hold!");
+
+  // Everything defaults to needing one register.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    NumRegistersForVT[i] = 1;
+    RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+  }
+  // ...except isVoid, which doesn't need any registers.
+  NumRegistersForVT[MVT::isVoid] = 0;
+
+  // Find the largest integer register class.
+  unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+  for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+    assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+  // Every integer value type larger than this largest register takes twice as
+  // many registers to represent as the previous ValueType.
+  for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
+    EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
+    if (!ExpandedVT.isInteger())
+      break;
+    NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+    RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+    TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+    ValueTypeActions.setTypeAction(ExpandedVT, Expand);
+  }
+
+  // Inspect all of the ValueType's smaller than the largest integer
+  // register to see which ones need promotion.
+  unsigned LegalIntReg = LargestIntReg;
+  for (unsigned IntReg = LargestIntReg - 1;
+       IntReg >= (unsigned)MVT::i1; --IntReg) {
+    EVT IVT = (MVT::SimpleValueType)IntReg;
+    if (isTypeLegal(IVT)) {
+      LegalIntReg = IntReg;
+    } else {
+      RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+        (MVT::SimpleValueType)LegalIntReg;
+      ValueTypeActions.setTypeAction(IVT, Promote);
+    }
+  }
+
+  // ppcf128 type is really two f64's.
+  if (!isTypeLegal(MVT::ppcf128)) {
+    NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+    RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+    TransformToType[MVT::ppcf128] = MVT::f64;
+    ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
+  }    
+
+  // Decide how to handle f64. If the target does not have native f64 support,
+  // expand it to i64 and we will be generating soft float library calls.
+  if (!isTypeLegal(MVT::f64)) {
+    NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+    RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+    TransformToType[MVT::f64] = MVT::i64;
+    ValueTypeActions.setTypeAction(MVT::f64, Expand);
+  }
+
+  // Decide how to handle f32. If the target does not have native support for
+  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  if (!isTypeLegal(MVT::f32)) {
+    if (isTypeLegal(MVT::f64)) {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+      TransformToType[MVT::f32] = MVT::f64;
+      ValueTypeActions.setTypeAction(MVT::f32, Promote);
+    } else {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+      TransformToType[MVT::f32] = MVT::i32;
+      ValueTypeActions.setTypeAction(MVT::f32, Expand);
+    }
+  }
+  
+  // Loop over all of the vector value types to see which need transformations.
+  for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    MVT VT = (MVT::SimpleValueType)i;
+    if (!isTypeLegal(VT)) {
+      MVT IntermediateVT;
+      EVT RegisterVT;
+      unsigned NumIntermediates;
+      NumRegistersForVT[i] =
+        getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+                                  RegisterVT, this);
+      RegisterTypeForVT[i] = RegisterVT;
+      
+      // Determine if there is a legal wider type.
+      bool IsLegalWiderType = false;
+      EVT EltVT = VT.getVectorElementType();
+      unsigned NElts = VT.getVectorNumElements();
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        EVT SVT = (MVT::SimpleValueType)nVT;
+        if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
+            SVT.getVectorNumElements() > NElts && NElts != 1) {
+          TransformToType[i] = SVT;
+          ValueTypeActions.setTypeAction(VT, Promote);
+          IsLegalWiderType = true;
+          break;
+        }
+      }
+      if (!IsLegalWiderType) {
+        EVT NVT = VT.getPow2VectorType();
+        if (NVT == VT) {
+          // Type is already a power of 2.  The default action is to split.
+          TransformToType[i] = MVT::Other;
+          ValueTypeActions.setTypeAction(VT, Expand);
+        } else {
+          TransformToType[i] = NVT;
+          ValueTypeActions.setTypeAction(VT, Promote);
+        }
+      }
+    }
+  }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  return NULL;
+}
+
+
+MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+  return PointerTy.SimpleTy;
+}
+
+MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
+  return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register.  It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                                EVT &IntermediateVT,
+                                                unsigned &NumIntermediates,
+                                                EVT &RegisterVT) const {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = VT.getVectorNumElements();
+  EVT EltTy = VT.getVectorElementType();
+  
+  unsigned NumVectorRegs = 1;
+  
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we 
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+  
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !isTypeLegal(
+                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+  
+  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+  if (!isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  EVT DestVT = getRegisterType(Context, NewVT);
+  RegisterVT = DestVT;
+  if (DestVT.bitsLT(NewVT)) {
+    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+  } else {
+    // Otherwise, promotion or legal types use the same number of registers as
+    // the vector decimated to the appropriate level.
+    return NumVectorRegs;
+  }
+  
+  return 1;
+}
+
+/// getWidenVectorType: given a vector type, returns the type to widen to
+/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and where to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+EVT TargetLowering::getWidenVectorType(EVT VT) const {
+  assert(VT.isVector());
+  if (isTypeLegal(VT))
+    return VT;
+ 
+  // Default is not to widen until moved to LegalizeTypes
+  return MVT::Other;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.  This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+  return TD->getCallFrameTypeAlignment(Ty);
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function.  The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+  // In non-pic modes, just use the address of a block.
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+    return MachineJumpTableInfo::EK_BlockAddress;
+  
+  // In PIC mode, if the target supports a GPRel32 directive, use it.
+  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+    return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+  
+  // Otherwise, use a label difference.
+  return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                 SelectionDAG &DAG) const {
+  // If our PIC model is GP relative, use the global offset table as the base.
+  if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+  return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                             unsigned JTI,MCContext &Ctx) const{
+  // The normal PIC reloc base is the label at the start of the jump table.
+  return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // Assume that everything is safe in static mode.
+  if (getTargetMachine().getRelocationModel() == Reloc::Static)
+    return true;
+
+  // In dynamic-no-pic mode, assume that known defined values are safe.
+  if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+      GA &&
+      !GA->getGlobal()->isDeclaration() &&
+      !GA->getGlobal()->isWeakForLinker())
+    return true;
+
+  // Otherwise assume nothing is safe.
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//  Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// specified instruction is a constant integer.  If so, check to see if there
+/// are any bits set in the constant that are not demanded.  If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, 
+                                                        const APInt &Demanded) {
+  DebugLoc dl = Op.getDebugLoc();
+
+  // FIXME: ISD::SELECT, ISD::SELECT_CC
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::XOR:
+  case ISD::AND:
+  case ISD::OR: {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    if (!C) return false;
+
+    if (Op.getOpcode() == ISD::XOR &&
+        (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+      return false;
+
+    // if we can expand it to have all bits set, do it
+    if (C->getAPIntValue().intersects(~Demanded)) {
+      EVT VT = Op.getValueType();
+      SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+                                DAG.getConstant(Demanded &
+                                                C->getAPIntValue(), 
+                                                VT));
+      return CombineTo(Op, New);
+    }
+
+    break;
+  }
+  }
+
+  return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+                                                    unsigned BitWidth,
+                                                    const APInt &Demanded,
+                                                    DebugLoc dl) {
+  assert(Op.getNumOperands() == 2 &&
+         "ShrinkDemandedOp only supports binary operators!");
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "ShrinkDemandedOp only supports nodes with one result!");
+
+  // Don't do this if the node has another user, which may require the
+  // full value.
+  if (!Op.getNode()->hasOneUse())
+    return false;
+
+  // Search for the smallest integer type with free casts to and from
+  // Op's type. For expedience, just check power-of-2 integer types.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+  if (!isPowerOf2_32(SmallVTBits))
+    SmallVTBits = NextPowerOf2(SmallVTBits);
+  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+    EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+    if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+        TLI.isZExtFree(SmallVT, Op.getValueType())) {
+      // We found a type with free casts.
+      SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(0)),
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(1)));
+      SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+      return CombineTo(Op, Z);
+    }
+  }
+  return false;
+}
+
+/// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream.  If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller).  The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+                                          const APInt &DemandedMask,
+                                          APInt &KnownZero,
+                                          APInt &KnownOne,
+                                          TargetLoweringOpt &TLO,
+                                          unsigned Depth) const {
+  unsigned BitWidth = DemandedMask.getBitWidth();
+  assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+         "Mask size mismatches value type size!");
+  APInt NewMask = DemandedMask;
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Don't know anything.
+  KnownZero = KnownOne = APInt(BitWidth, 0);
+
+  // Other users may use these bits.
+  if (!Op.getNode()->hasOneUse()) { 
+    if (Depth != 0) {
+      // If not at the root, Just compute the KnownZero/KnownOne bits to 
+      // simplify things downstream.
+      TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+      return false;
+    }
+    // If this is the root being simplified, allow it to have multiple uses,
+    // just set the NewMask to all bits.
+    NewMask = APInt::getAllOnesValue(BitWidth);
+  } else if (DemandedMask == 0) {   
+    // Not demanding any bits from Op.
+    if (Op.getOpcode() != ISD::UNDEF)
+      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+    return false;
+  } else if (Depth == 6) {        // Limit search depth.
+    return false;
+  }
+
+  APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
+    KnownZero = ~KnownOne & NewMask;
+    return false;   // Don't fall through, will infinitely loop.
+  case ISD::AND:
+    // If the RHS is a constant, check to see if the LHS would be zero without
+    // using the bits from the RHS.  Below, we use knowledge about the RHS to
+    // simplify the LHS, here we're using information from the LHS to simplify
+    // the RHS.
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      APInt LHSZero, LHSOne;
+      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
+                                LHSZero, LHSOne, Depth+1);
+      // If the LHS already has zeros where RHSC does, this and is dead.
+      if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+        return TLO.CombineTo(Op, Op.getOperand(0));
+      // If any of the set bits in the RHS are known zero on the LHS, shrink
+      // the constant.
+      if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+        return true;
+    }
+    
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+      
+    // If all of the demanded bits are known one on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+      return true;
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    break;
+  case ISD::OR:
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the potentially set bits on one side are known to be set on
+    // the other side, just use the 'other' side.
+    if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    break;
+  case ISD::XOR:
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'xor'.
+    if ((KnownZero & NewMask) == NewMask)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((KnownZero2 & NewMask) == NewMask)
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // If all of the unknown bits are known to be zero on one side or the other
+    // (but not both) turn this into an *inclusive* or.
+    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+    if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+                                               Op.getOperand(0),
+                                               Op.getOperand(1)));
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    
+    // If all of the demanded bits on one side are known, and all of the set
+    // bits on that side are also known to be set on the other side, turn this
+    // into an AND, as we know the bits will be cleared.
+    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+    if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
+      if ((KnownOne & KnownOne2) == KnownOne) {
+        EVT VT = Op.getValueType();
+        SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, 
+                                                 Op.getOperand(0), ANDC));
+      }
+    }
+    
+    // If the RHS is a constant, see if we can simplify it.
+    // for XOR, we prefer to force bits to 1 if they will make a -1.
+    // if we can't force bits, try to shrink constant
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      APInt Expanded = C->getAPIntValue() | (~NewMask);
+      // if we can expand it to have all bits set, do it
+      if (Expanded.isAllOnesValue()) {
+        if (Expanded != C->getAPIntValue()) {
+          EVT VT = Op.getValueType();
+          SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+                                          TLO.DAG.getConstant(Expanded, VT));
+          return TLO.CombineTo(Op, New);
+        }
+        // if it already has all the bits set, nothing to change
+        // but don't shrink either!
+      } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+        return true;
+      }
+    }
+
+    KnownZero = KnownZeroOut;
+    KnownOne  = KnownOneOut;
+    break;
+  case ISD::SELECT:
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SELECT_CC:
+    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+      
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SHL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+      SDValue InOp = Op.getOperand(0);
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the bottom bits (which are shifted
+      // out) are never demanded.
+      if (InOp.getOpcode() == ISD::SRL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+          unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+          unsigned Opc = ISD::SHL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SRL;
+          }          
+          
+          SDValue NewSA = 
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          EVT VT = Op.getValueType();
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }      
+      
+      if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt),
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      KnownZero <<= SA->getZExtValue();
+      KnownOne  <<= SA->getZExtValue();
+      // low bits known zero.
+      KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+    }
+    break;
+  case ISD::SRL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      EVT VT = Op.getValueType();
+      unsigned ShAmt = SA->getZExtValue();
+      unsigned VTSize = VT.getSizeInBits();
+      SDValue InOp = Op.getOperand(0);
+      
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the top bits (which are shifted out)
+      // are never demanded.
+      if (InOp.getOpcode() == ISD::SHL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+          unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+          unsigned Opc = ISD::SRL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SHL;
+          }          
+          
+          SDValue NewSA =
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }      
+      
+      // Compute the new bits that are at the top now.
+      if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    break;
+  case ISD::SRA:
+    // If this is an arithmetic shift right and only the low-bit is set, we can
+    // always convert this into a logical shr, even if the shift amount is
+    // variable.  The low bit of the shift cannot be an input sign bit unless
+    // the shift amount is >= the size of the datatype, which is undefined.
+    if (DemandedMask == 1)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+                                               Op.getOperand(0), Op.getOperand(1)));
+
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      EVT VT = Op.getValueType();
+      unsigned ShAmt = SA->getZExtValue();
+      
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      APInt InDemandedMask = (NewMask << ShAmt);
+
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+      if (HighBits.intersects(NewMask))
+        InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+      
+      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+      
+      // Handle the sign bit, adjusted to where it is now in the mask.
+      APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+      
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, 
+                                                 Op.getOperand(0),
+                                                 Op.getOperand(1)));
+      } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+        KnownOne |= HighBits;
+      }
+    }
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+    // Sign extension.  Compute the demanded bits in the result that are not 
+    // present in the input.
+    APInt NewBits =
+      APInt::getHighBitsSet(BitWidth,
+                            BitWidth - EVT.getScalarType().getSizeInBits()) &
+      NewMask;
+    
+    // If none of the extended bits are demanded, eliminate the sextinreg.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+
+    APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
+    InSignBit.zext(BitWidth);
+    APInt InputDemandedBits =
+      APInt::getLowBitsSet(BitWidth,
+                           EVT.getScalarType().getSizeInBits()) &
+      NewMask;
+    
+    // Since the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    InputDemandedBits |= InSignBit;
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    
+    // If the input sign bit is known zero, convert this into a zero extension.
+    if (KnownZero.intersects(InSignBit))
+      return TLO.CombineTo(Op, 
+                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+    
+    if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
+      KnownOne |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                       // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ZERO_EXTEND: {
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    APInt InMask = NewMask;
+    InMask.trunc(OperandBitWidth);
+    
+    // If none of the top bits are demanded, convert this into an any_extend.
+    APInt NewBits =
+      APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+    if (!NewBits.intersects(NewMask))
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+                                               Op.getValueType(), 
+                                               Op.getOperand(0)));
+    
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero.zext(BitWidth);
+    KnownOne.zext(BitWidth);
+    KnownZero |= NewBits;
+    break;
+  }
+  case ISD::SIGN_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
+    APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+    APInt NewBits   = ~InMask & NewMask;
+    
+    // If none of the top bits are demanded, convert this into an any_extend.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+                                              Op.getValueType(),
+                                              Op.getOperand(0)));
+    
+    // Since some of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    APInt InDemandedBits = InMask & NewMask;
+    InDemandedBits |= InSignBit;
+    InDemandedBits.trunc(InBits);
+    
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    KnownZero.zext(BitWidth);
+    KnownOne.zext(BitWidth);
+    
+    // If the sign bit is known zero, convert this to a zero extend.
+    if (KnownZero.intersects(InSignBit))
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+                                               Op.getValueType(), 
+                                               Op.getOperand(0)));
+    
+    // If the sign bit is known one, the top bits match.
+    if (KnownOne.intersects(InSignBit)) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ANY_EXTEND: {
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    APInt InMask = NewMask;
+    InMask.trunc(OperandBitWidth);
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero.zext(BitWidth);
+    KnownOne.zext(BitWidth);
+    break;
+  }
+  case ISD::TRUNCATE: {
+    // Simplify the input, using demanded bit information, and compute the known
+    // zero/one bits live out.
+    APInt TruncMask = NewMask;
+    TruncMask.zext(Op.getOperand(0).getValueSizeInBits());
+    if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    KnownZero.trunc(BitWidth);
+    KnownOne.trunc(BitWidth);
+    
+    // If the input is only used by this truncate, see if we can shrink it based
+    // on the known demanded bits.
+    if (Op.getOperand(0).getNode()->hasOneUse()) {
+      SDValue In = Op.getOperand(0);
+      unsigned InBitWidth = In.getValueSizeInBits();
+      switch (In.getOpcode()) {
+      default: break;
+      case ISD::SRL:
+        // Shrink SRL by a constant if none of the high bits shifted in are
+        // demanded.
+        if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
+          APInt HighBits = APInt::getHighBitsSet(InBitWidth,
+                                                 InBitWidth - BitWidth);
+          HighBits = HighBits.lshr(ShAmt->getZExtValue());
+          HighBits.trunc(BitWidth);
+          
+          if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+            // None of the shifted in bits are needed.  Add a truncate of the
+            // shift input, then shift it.
+            SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+                                                 Op.getValueType(), 
+                                                 In.getOperand(0));
+            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+                                                     Op.getValueType(),
+                                                     NewTrunc, 
+                                                     In.getOperand(1)));
+          }
+        }
+        break;
+      }
+    }
+    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    break;
+  }
+  case ISD::AssertZext: {
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    APInt InMask = APInt::getLowBitsSet(BitWidth,
+                                        VT.getSizeInBits());
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero |= ~InMask & NewMask;
+    break;
+  }
+  case ISD::BIT_CONVERT:
+#if 0
+    // If this is an FP->Int bitcast and if the sign bit is the only thing that
+    // is demanded, turn this into a FGETSIGN.
+    if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
+        MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
+        !MVT::isVector(Op.getOperand(0).getValueType())) {
+      // Only do this xform if FGETSIGN is valid or if before legalize.
+      if (!TLO.AfterLegalize ||
+          isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
+        // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+        // place.  We expect the SHL to be eliminated by other optimizations.
+        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), 
+                                         Op.getOperand(0));
+        unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+        SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
+                                                 Sign, ShAmt));
+      }
+    }
+#endif
+    break;
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::SUB: {
+    // Add, Sub, and Mul don't demand any bits in positions beyond that
+    // of the highest bit demanded of them.
+    APInt LoMask = APInt::getLowBitsSet(BitWidth,
+                                        BitWidth - NewMask.countLeadingZeros());
+    if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    // See if the operation should be performed at a smaller bit width.
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+  }
+  // FALL THROUGH
+  default:
+    // Just use ComputeMaskedBits to compute output bits.
+    TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+    break;
+  }
+  
+  // If we know the value of all of the demanded bits, return this as a
+  // constant.
+  if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+    return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+  
+  return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified 
+/// in Mask are known to be either zero or one and return them in the 
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 
+                                                    const APInt &Mask,
+                                                    APInt &KnownZero, 
+                                                    APInt &KnownOne,
+                                                    const SelectionDAG &DAG,
+                                                    unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                         unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use ComputeNumSignBits if you don't know whether Op"
+         " is a target node!");
+  return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+  // A left-shift of a constant one will have exactly one bit set, because
+  // shifting the bit off the end is undefined.
+  if (Val.getOpcode() == ISD::SHL)
+    if (ConstantSDNode *C =
+         dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+      if (C->getAPIntValue() == 1)
+        return true;
+
+  // Similarly, a right-shift of a constant sign-bit will have exactly
+  // one bit set.
+  if (Val.getOpcode() == ISD::SRL)
+    if (ConstantSDNode *C =
+         dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+      if (C->getAPIntValue().isSignBit())
+        return true;
+
+  // More could be done here, though the above checks are enough
+  // to handle some common cases.
+
+  // Fall back to ComputeMaskedBits to catch other known cases.
+  EVT OpVT = Val.getValueType();
+  unsigned BitWidth = OpVT.getSizeInBits();
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+  return (KnownZero.countPopulation() == BitWidth - 1) &&
+         (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands 
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+                              ISD::CondCode Cond, bool foldBooleans,
+                              DAGCombinerInfo &DCI, DebugLoc dl) const {
+  SelectionDAG &DAG = DCI.DAG;
+  LLVMContext &Context = *DAG.getContext();
+
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
+  }
+
+  if (isa<ConstantSDNode>(N0.getNode())) {
+    // Ensure that the constant occurs on the RHS, and fold constant
+    // comparisons.
+    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+  }
+
+  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+    const APInt &C1 = N1C->getAPIntValue();
+
+    // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+    // equality comparison, then we're just comparing whether X itself is
+    // zero.
+    if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+        N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+        N0.getOperand(1).getOpcode() == ISD::Constant) {
+      const APInt &ShAmt
+        = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+          ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+        if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+          // (srl (ctlz x), 5) == 0  -> X != 0
+          // (srl (ctlz x), 5) != 1  -> X != 0
+          Cond = ISD::SETNE;
+        } else {
+          // (srl (ctlz x), 5) != 0  -> X == 0
+          // (srl (ctlz x), 5) == 1  -> X == 0
+          Cond = ISD::SETEQ;
+        }
+        SDValue Zero = DAG.getConstant(0, N0.getValueType());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+                            Zero, Cond);
+      }
+    }
+
+    // If the LHS is '(and load, const)', the RHS is 0,
+    // the test is for equality or unsigned, and all 1 bits of the const are
+    // in the same partial word, see if we can shorten the load.
+    if (DCI.isBeforeLegalize() &&
+        N0.getOpcode() == ISD::AND && C1 == 0 &&
+        N0.getNode()->hasOneUse() &&
+        isa<LoadSDNode>(N0.getOperand(0)) &&
+        N0.getOperand(0).getNode()->hasOneUse() &&
+        isa<ConstantSDNode>(N0.getOperand(1))) {
+      LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+      APInt bestMask;
+      unsigned bestWidth = 0, bestOffset = 0;
+      if (!Lod->isVolatile() && Lod->isUnindexed()) {
+        unsigned origWidth = N0.getValueType().getSizeInBits();
+        unsigned maskWidth = origWidth;
+        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
+        // 8 bits, but have to be careful...
+        if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+          origWidth = Lod->getMemoryVT().getSizeInBits();
+        const APInt &Mask =
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+          APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+          for (unsigned offset=0; offset<origWidth/width; offset++) {
+            if ((newMask & Mask) == Mask) {
+              if (!TD->isLittleEndian())
+                bestOffset = (origWidth/width - offset - 1) * (width/8);
+              else
+                bestOffset = (uint64_t)offset * (width/8);
+              bestMask = Mask.lshr(offset * (width/8) * 8);
+              bestWidth = width;
+              break;
+            }
+            newMask = newMask << width;
+          }
+        }
+      }
+      if (bestWidth) {
+        EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+        if (newVT.isRound()) {
+          EVT PtrType = Lod->getOperand(1).getValueType();
+          SDValue Ptr = Lod->getBasePtr();
+          if (bestOffset != 0)
+            Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+                              DAG.getConstant(bestOffset, PtrType));
+          unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+          SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+                                        Lod->getSrcValue(), 
+                                        Lod->getSrcValueOffset() + bestOffset,
+                                        false, NewAlign);
+          return DAG.getSetCC(dl, VT, 
+                              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+                                      DAG.getConstant(bestMask.trunc(bestWidth),
+                                                      newVT)),
+                              DAG.getConstant(0LL, newVT), Cond);
+        }
+      }
+    }
+
+    // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+      unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+      // If the comparison constant has bits in the upper part, the
+      // zero-extended value could never match.
+      if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+                                              C1.getBitWidth() - InSize))) {
+        switch (Cond) {
+        case ISD::SETUGT:
+        case ISD::SETUGE:
+        case ISD::SETEQ: return DAG.getConstant(0, VT);
+        case ISD::SETULT:
+        case ISD::SETULE:
+        case ISD::SETNE: return DAG.getConstant(1, VT);
+        case ISD::SETGT:
+        case ISD::SETGE:
+          // True if the sign bit of C1 is set.
+          return DAG.getConstant(C1.isNegative(), VT);
+        case ISD::SETLT:
+        case ISD::SETLE:
+          // True if the sign bit of C1 isn't set.
+          return DAG.getConstant(C1.isNonNegative(), VT);
+        default:
+          break;
+        }
+      }
+
+      // Otherwise, we can perform the comparison with the low bits.
+      switch (Cond) {
+      case ISD::SETEQ:
+      case ISD::SETNE:
+      case ISD::SETUGT:
+      case ISD::SETUGE:
+      case ISD::SETULT:
+      case ISD::SETULE: {
+        EVT newVT = N0.getOperand(0).getValueType();
+        if (DCI.isBeforeLegalizeOps() ||
+            (isOperationLegal(ISD::SETCC, newVT) &&
+              getCondCodeAction(Cond, newVT)==Legal))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                              DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+                              Cond);
+        break;
+      }
+      default:
+        break;   // todo, be more careful with signed comparisons
+      }
+    } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+      unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+      EVT ExtDstTy = N0.getValueType();
+      unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+      // If the extended part has any inconsistent bits, it cannot ever
+      // compare equal.  In other words, they have to be all ones or all
+      // zeros.
+      APInt ExtBits =
+        APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
+      if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+        return DAG.getConstant(Cond == ISD::SETNE, VT);
+      
+      SDValue ZextOp;
+      EVT Op0Ty = N0.getOperand(0).getValueType();
+      if (Op0Ty == ExtSrcTy) {
+        ZextOp = N0.getOperand(0);
+      } else {
+        APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+        ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+                              DAG.getConstant(Imm, Op0Ty));
+      }
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(ZextOp.getNode());
+      // Otherwise, make this a use of a zext.
+      return DAG.getSetCC(dl, VT, ZextOp, 
+                          DAG.getConstant(C1 & APInt::getLowBitsSet(
+                                                              ExtDstTyBits,
+                                                              ExtSrcTyBits), 
+                                          ExtDstTy),
+                          Cond);
+    } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      
+      // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
+      if (N0.getOpcode() == ISD::SETCC) {
+        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+        if (TrueWhenTrue)
+          return N0;
+        
+        // Invert the condition.
+        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+        CC = ISD::getSetCCInverse(CC, 
+                                  N0.getOperand(0).getValueType().isInteger());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+      }
+      
+      if ((N0.getOpcode() == ISD::XOR ||
+            (N0.getOpcode() == ISD::AND && 
+            N0.getOperand(0).getOpcode() == ISD::XOR &&
+            N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+          isa<ConstantSDNode>(N0.getOperand(1)) &&
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+        // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
+        // can only do this if the top bits are known zero.
+        unsigned BitWidth = N0.getValueSizeInBits();
+        if (DAG.MaskedValueIsZero(N0,
+                                  APInt::getHighBitsSet(BitWidth,
+                                                        BitWidth-1))) {
+          // Okay, get the un-inverted input value.
+          SDValue Val;
+          if (N0.getOpcode() == ISD::XOR)
+            Val = N0.getOperand(0);
+          else {
+            assert(N0.getOpcode() == ISD::AND && 
+                    N0.getOperand(0).getOpcode() == ISD::XOR);
+            // ((X^1)&1)^1 -> X & 1
+            Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+                              N0.getOperand(0).getOperand(0),
+                              N0.getOperand(1));
+          }
+          return DAG.getSetCC(dl, VT, Val, N1,
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
+      }
+    }
+    
+    APInt MinVal, MaxVal;
+    unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+    if (ISD::isSignedIntSetCC(Cond)) {
+      MinVal = APInt::getSignedMinValue(OperandBitSize);
+      MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+    } else {
+      MinVal = APInt::getMinValue(OperandBitSize);
+      MaxVal = APInt::getMaxValue(OperandBitSize);
+    }
+
+    // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+    if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+      if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+      // X >= C0 --> X > (C0-1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(C1-1, N1.getValueType()),
+                          (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+    }
+
+    if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+      if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+      // X <= C0 --> X < (C0+1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(C1+1, N1.getValueType()),
+                          (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+    }
+
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+      return DAG.getConstant(0, VT);      // X < MIN --> false
+    if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+      return DAG.getConstant(1, VT);      // X >= MIN --> true
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+      return DAG.getConstant(0, VT);      // X > MAX --> false
+    if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+      return DAG.getConstant(1, VT);      // X <= MAX --> true
+
+    // Canonicalize setgt X, Min --> setne X, Min
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+    // Canonicalize setlt X, Max --> setne X, Max
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+    // If we have setult X, 1, turn it into seteq X, 0
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(MinVal, N0.getValueType()), 
+                          ISD::SETEQ);
+    // If we have setugt X, Max-1, turn it into seteq X, Max
+    else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(MaxVal, N0.getValueType()),
+                          ISD::SETEQ);
+
+    // If we have "setcc X, C0", check to see if we can shrink the immediate
+    // by changing cc.
+
+    // SETUGT X, SINTMAX  -> SETLT X, 0
+    if (Cond == ISD::SETUGT && 
+        C1 == APInt::getSignedMaxValue(OperandBitSize))
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(0, N1.getValueType()),
+                          ISD::SETLT);
+
+    // SETULT X, SINTMIN  -> SETGT X, -1
+    if (Cond == ISD::SETULT &&
+        C1 == APInt::getSignedMinValue(OperandBitSize)) {
+      SDValue ConstMinusOne =
+          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+                          N1.getValueType());
+      return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+    }
+
+    // Fold bit comparisons when we can.
+    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+        (VT == N0.getValueType() ||
+         (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+        N0.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS =
+                  dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+        EVT ShiftTy = DCI.isBeforeLegalize() ?
+          getPointerTy() : getShiftAmountTy();
+        if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
+          // Perform the xform if the AND RHS is a single bit.
+          if (AndRHS->getAPIntValue().isPowerOf2()) {
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                              DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+          }
+        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+          // (X & 8) == 8  -->  (X & 8) >> 3
+          // Perform the xform if C1 is a single bit.
+          if (C1.isPowerOf2()) {
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
+          }
+        }
+      }
+  }
+
+  if (isa<ConstantFPSDNode>(N0.getNode())) {
+    // Constant fold or commute setcc.
+    SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+    if (O.getNode()) return O;
+  } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+    // If the RHS of an FP comparison is a constant, simplify it away in
+    // some cases.
+    if (CFP->getValueAPF().isNaN()) {
+      // If an operand is known to be a nan, we can fold it.
+      switch (ISD::getUnorderedFlavor(Cond)) {
+      default: llvm_unreachable("Unknown flavor!");
+      case 0:  // Known false.
+        return DAG.getConstant(0, VT);
+      case 1:  // Known true.
+        return DAG.getConstant(1, VT);
+      case 2:  // Undefined.
+        return DAG.getUNDEF(VT);
+      }
+    }
+    
+    // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
+    // constant if knowing that the operand is non-nan is enough.  We prefer to
+    // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+    // materialize 0.0.
+    if (Cond == ISD::SETO || Cond == ISD::SETUO)
+      return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+    // If the condition is not legal, see if we can find an equivalent one
+    // which is legal.
+    if (!isCondCodeLegal(Cond, N0.getValueType())) {
+      // If the comparison was an awkward floating-point == or != and one of
+      // the comparison operands is infinity or negative infinity, convert the
+      // condition to a less-awkward <= or >=.
+      if (CFP->getValueAPF().isInfinity()) {
+        if (CFP->getValueAPF().isNegative()) {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+        } else {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+        }
+      }
+    }
+  }
+
+  if (N0 == N1) {
+    // We can always fold X == X for integer setcc's.
+    if (N0.getValueType().isInteger())
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    unsigned UOF = ISD::getUnorderedFlavor(Cond);
+    if (UOF == 2)   // FP operators that are undefined on NaNs.
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+      return DAG.getConstant(UOF, VT);
+    // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
+    // if it is not already.
+    ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+    if (NewCond != Cond)
+      return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+  }
+
+  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+      N0.getValueType().isInteger()) {
+    if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+        N0.getOpcode() == ISD::XOR) {
+      // Simplify (X+Y) == (X+Z) -->  Y == Z
+      if (N0.getOpcode() == N1.getOpcode()) {
+        if (N0.getOperand(0) == N1.getOperand(0))
+          return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+        if (N0.getOperand(1) == N1.getOperand(1))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+        if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+          // If X op Y == Y op X, try other combinations.
+          if (N0.getOperand(0) == N1.getOperand(1))
+            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), 
+                                Cond);
+          if (N0.getOperand(1) == N1.getOperand(0))
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), 
+                                Cond);
+        }
+      }
+      
+      if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+        if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+          // Turn (X+C1) == C2 --> X == C2-C1
+          if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+            return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                                DAG.getConstant(RHSC->getAPIntValue()-
+                                                LHSR->getAPIntValue(),
+                                N0.getValueType()), Cond);
+          }
+          
+          // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+          if (N0.getOpcode() == ISD::XOR)
+            // If we know that all of the inverted bits are zero, don't bother
+            // performing the inversion.
+            if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+              return
+                DAG.getSetCC(dl, VT, N0.getOperand(0),
+                             DAG.getConstant(LHSR->getAPIntValue() ^
+                                               RHSC->getAPIntValue(),
+                                             N0.getValueType()),
+                             Cond);
+        }
+        
+        // Turn (C1-X) == C2 --> X == C1-C2
+        if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+          if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+            return
+              DAG.getSetCC(dl, VT, N0.getOperand(1),
+                           DAG.getConstant(SUBC->getAPIntValue() -
+                                             RHSC->getAPIntValue(),
+                                           N0.getValueType()),
+                           Cond);
+          }
+        }          
+      }
+
+      // Simplify (X+Z) == X -->  Z == 0
+      if (N0.getOperand(0) == N1)
+        return DAG.getSetCC(dl, VT, N0.getOperand(1),
+                        DAG.getConstant(0, N0.getValueType()), Cond);
+      if (N0.getOperand(1) == N1) {
+        if (DAG.isCommutativeBinOp(N0.getOpcode()))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                          DAG.getConstant(0, N0.getValueType()), Cond);
+        else if (N0.getNode()->hasOneUse()) {
+          assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // (Z-X) == X  --> Z == X<<1
+          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
+                                     N1, 
+                                     DAG.getConstant(1, getShiftAmountTy()));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.getNode());
+          return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+        }
+      }
+    }
+
+    if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+        N1.getOpcode() == ISD::XOR) {
+      // Simplify  X == (X+Z) -->  Z == 0
+      if (N1.getOperand(0) == N0) {
+        return DAG.getSetCC(dl, VT, N1.getOperand(1),
+                        DAG.getConstant(0, N1.getValueType()), Cond);
+      } else if (N1.getOperand(1) == N0) {
+        if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+          return DAG.getSetCC(dl, VT, N1.getOperand(0),
+                          DAG.getConstant(0, N1.getValueType()), Cond);
+        } else if (N1.getNode()->hasOneUse()) {
+          assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // X == (Z-X)  --> X<<1 == Z
+          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, 
+                                     DAG.getConstant(1, getShiftAmountTy()));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.getNode());
+          return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+        }
+      }
+    }
+
+    // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+    // Note that where y is variable and is known to have at most
+    // one bit set (for example, if it is z&1) we cannot do this;
+    // the expressions are not equivalent when y==0.
+    if (N0.getOpcode() == ISD::AND)
+      if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+        if (ValueHasExactlyOneBitSet(N1, DAG)) {
+          Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+          SDValue Zero = DAG.getConstant(0, N1.getValueType());
+          return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+        }
+      }
+    if (N1.getOpcode() == ISD::AND)
+      if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+        if (ValueHasExactlyOneBitSet(N0, DAG)) {
+          Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+          SDValue Zero = DAG.getConstant(0, N0.getValueType());
+          return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+        }
+      }
+  }
+
+  // Fold away ALL boolean setcc's.
+  SDValue Temp;
+  if (N0.getValueType() == MVT::i1 && foldBooleans) {
+    switch (Cond) {
+    default: llvm_unreachable("Unknown integer setcc!");
+    case ISD::SETEQ:  // X == Y  -> ~(X^Y)
+      Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+      N0 = DAG.getNOT(dl, Temp, MVT::i1);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETNE:  // X != Y   -->  (X^Y)
+      N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+      break;
+    case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
+    case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
+      Temp = DAG.getNOT(dl, N0, MVT::i1);
+      N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
+    case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
+      Temp = DAG.getNOT(dl, N1, MVT::i1);
+      N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
+    case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
+      Temp = DAG.getNOT(dl, N0, MVT::i1);
+      N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
+    case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
+      Temp = DAG.getNOT(dl, N1, MVT::i1);
+      N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+      break;
+    }
+    if (VT != MVT::i1) {
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(N0.getNode());
+      // FIXME: If running after legalize, we probably can't do this.
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+    }
+    return N0;
+  }
+
+  // Could not fold it.
+  return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
+                                    int64_t &Offset) const {
+  if (isa<GlobalAddressSDNode>(N)) {
+    GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+    GA = GASD->getGlobal();
+    Offset += GASD->getOffset();
+    return true;
+  }
+
+  if (N->getOpcode() == ISD::ADD) {
+    SDValue N1 = N->getOperand(0);
+    SDValue N2 = N->getOperand(1);
+    if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+      if (V) {
+        Offset += V->getSExtValue();
+        return true;
+      }
+    } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+      if (V) {
+        Offset += V->getSExtValue();
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+  // Default implementation: no optimization.
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+  // FIXME: lots more standard ones to handle.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'r': return C_RegisterClass;
+    case 'm':    // memory
+    case 'o':    // offsetable
+    case 'V':    // not offsetable
+      return C_Memory;
+    case 'i':    // Simple Integer or Relocatable Constant
+    case 'n':    // Simple Integer
+    case 's':    // Relocatable Constant
+    case 'X':    // Allow ANY value.
+    case 'I':    // Target registers.
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'O':
+    case 'P':
+      return C_Other;
+    }
+  }
+  
+  if (Constraint.size() > 1 && Constraint[0] == '{' && 
+      Constraint[Constraint.size()-1] == '}')
+    return C_Register;
+  return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+  if (ConstraintVT.isInteger())
+    return "r";
+  if (ConstraintVT.isFloatingPoint())
+    return "f";      // works for many targets
+  return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                  char ConstraintLetter,
+                                                  bool hasMemory,
+                                                  std::vector<SDValue> &Ops,
+                                                  SelectionDAG &DAG) const {
+  switch (ConstraintLetter) {
+  default: break;
+  case 'X':     // Allows any operand; labels (basic block) use this.
+    if (Op.getOpcode() == ISD::BasicBlock) {
+      Ops.push_back(Op);
+      return;
+    }
+    // fall through
+  case 'i':    // Simple Integer or Relocatable Constant
+  case 'n':    // Simple Integer
+  case 's': {  // Relocatable Constant
+    // These operands are interested in values of the form (GV+C), where C may
+    // be folded in as an offset of GV, or it may be explicitly added.  Also, it
+    // is possible and fine if either GV or C are missing.
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+    
+    // If we have "(add GV, C)", pull out GV/C
+    if (Op.getOpcode() == ISD::ADD) {
+      C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+      GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+      if (C == 0 || GA == 0) {
+        C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+        GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+      }
+      if (C == 0 || GA == 0)
+        C = 0, GA = 0;
+    }
+    
+    // If we find a valid operand, map to the TargetXXX version so that the
+    // value itself doesn't get selected.
+    if (GA) {   // Either &GV   or   &GV+C
+      if (ConstraintLetter != 'n') {
+        int64_t Offs = GA->getOffset();
+        if (C) Offs += C->getZExtValue();
+        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+                                                 Op.getValueType(), Offs));
+        return;
+      }
+    }
+    if (C) {   // just C, no GV.
+      // Simple constants are not allowed for 's'.
+      if (ConstraintLetter != 's') {
+        // gcc prints these as sign extended.  Sign extend value to 64 bits
+        // now; without this it would get ZExt'd later in
+        // ScheduleDAGSDNodes::EmitNode, which is very generic.
+        Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+                                            MVT::i64));
+        return;
+      }
+    }
+    break;
+  }
+  }
+}
+
+std::vector<unsigned> TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  EVT VT) const {
+  return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint[0] != '{')
+    return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+  assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+  // Remove the braces from around the name.
+  StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+
+  // Figure out which register class contains this reg.
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+       E = RI->regclass_end(); RCI != E; ++RCI) {
+    const TargetRegisterClass *RC = *RCI;
+    
+    // If none of the value types for this register class are valid, we 
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    bool isLegal = false;
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (isTypeLegal(*I)) {
+        isLegal = true;
+        break;
+      }
+    }
+    
+    if (!isLegal) continue;
+    
+    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
+         I != E; ++I) {
+      if (RegName.equals_lower(RI->getName(*I)))
+        return std::make_pair(*I, RC);
+    }
+  }
+  
+  return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+  assert(!ConstraintCode.empty() && "No known constraint!");
+  return isdigit(ConstraintCode[0]);
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+  assert(!ConstraintCode.empty() && "No known constraint!");
+  return atoi(ConstraintCode.c_str());
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+  switch (CT) {
+  default: llvm_unreachable("Unknown constraint type!");
+  case TargetLowering::C_Other:
+  case TargetLowering::C_Unknown:
+    return 0;
+  case TargetLowering::C_Register:
+    return 1;
+  case TargetLowering::C_RegisterClass:
+    return 2;
+  case TargetLowering::C_Memory:
+    return 3;
+  }
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+///    Other         -> immediates and magic values
+///    Register      -> one specific register
+///    RegisterClass -> a group of regs
+///    Memory        -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it.  The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory.  Because of
+/// this the heuristic we use is:
+///
+///  1) If there is an 'other' constraint, and if the operand is valid for
+///     that constraint, use it.  This makes us take advantage of 'i'
+///     constraints when available.
+///  2) Otherwise, pick the most general constraint present.  This prefers
+///     'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+                             bool hasMemory,  const TargetLowering &TLI,
+                             SDValue Op, SelectionDAG *DAG) {
+  assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+  unsigned BestIdx = 0;
+  TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+  int BestGenerality = -1;
+  
+  // Loop over the options, keeping track of the most general one.
+  for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+    TargetLowering::ConstraintType CType =
+      TLI.getConstraintType(OpInfo.Codes[i]);
+    
+    // If this is an 'other' constraint, see if the operand is valid for it.
+    // For example, on X86 we might have an 'rI' constraint.  If the operand
+    // is an integer in the range [0..31] we want to use I (saving a load
+    // of a register), otherwise we must use 'r'.
+    if (CType == TargetLowering::C_Other && Op.getNode()) {
+      assert(OpInfo.Codes[i].size() == 1 &&
+             "Unhandled multi-letter 'other' constraint");
+      std::vector<SDValue> ResultOps;
+      TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory,
+                                       ResultOps, *DAG);
+      if (!ResultOps.empty()) {
+        BestType = CType;
+        BestIdx = i;
+        break;
+      }
+    }
+    
+    // This constraint letter is more general than the previous one, use it.
+    int Generality = getConstraintGenerality(CType);
+    if (Generality > BestGenerality) {
+      BestType = CType;
+      BestIdx = i;
+      BestGenerality = Generality;
+    }
+  }
+  
+  OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+  OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+                                            SDValue Op, 
+                                            bool hasMemory,
+                                            SelectionDAG *DAG) const {
+  assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+  
+  // Single-letter constraints ('r') are very common.
+  if (OpInfo.Codes.size() == 1) {
+    OpInfo.ConstraintCode = OpInfo.Codes[0];
+    OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+  } else {
+    ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG);
+  }
+  
+  // 'X' matches anything.
+  if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+    // Labels and constants are handled elsewhere ('X' is the only thing
+    // that matches labels).  For Functions, the type here is the type of
+    // the result, which is not what we want to look at; leave them alone.
+    Value *v = OpInfo.CallOperandVal;
+    if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+      OpInfo.CallOperandVal = v;
+      return;
+    }
+    
+    // Otherwise, try to resolve it to something we know about by looking at
+    // the actual operand type.
+    if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+      OpInfo.ConstraintCode = Repl;
+      OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, 
+                                           const Type *Ty) const {
+  // The default implementation of this implements a conservative RISCy, r+r and
+  // r+i addr mode.
+
+  // Allows a sign-extended 16-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+    return false;
+  
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+  
+  // Only support r+r, 
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  }
+  
+  return true;
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 
+                                  std::vector<SDNode*>* Created) const {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl= N->getDebugLoc();
+  
+  // Check to see if we can do this.
+  // FIXME: We should be more aggressive here.
+  if (!isTypeLegal(VT))
+    return SDValue();
+  
+  APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+  APInt::ms magics = d.magic();
+  
+  // Multiply the numerator (operand 0) by the magic value
+  // FIXME: We should support doing a MUL in a wider type
+  SDValue Q;
+  if (isOperationLegalOrCustom(ISD::MULHS, VT))
+    Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+                    DAG.getConstant(magics.m, VT));
+  else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+    Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+                              N->getOperand(0),
+                              DAG.getConstant(magics.m, VT)).getNode(), 1);
+  else
+    return SDValue();       // No mulhs or equvialent
+  // If d > 0 and m < 0, add the numerator
+  if (d.isStrictlyPositive() && magics.m.isNegative()) { 
+    Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // If d < 0 and m > 0, subtract the numerator.
+  if (d.isNegative() && magics.m.isStrictlyPositive()) {
+    Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // Shift right algebraic if shift value is nonzero
+  if (magics.s > 0) {
+    Q = DAG.getNode(ISD::SRA, dl, VT, Q, 
+                    DAG.getConstant(magics.s, getShiftAmountTy()));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // Extract the sign bit and add it to the quotient
+  SDValue T =
+    DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+                                                 getShiftAmountTy()));
+  if (Created)
+    Created->push_back(T.getNode());
+  return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+                                  std::vector<SDNode*>* Created) const {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Check to see if we can do this.
+  // FIXME: We should be more aggressive here.
+  if (!isTypeLegal(VT))
+    return SDValue();
+
+  // FIXME: We should use a narrower constant when the upper
+  // bits are known to be zero.
+  ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
+  APInt::mu magics = N1C->getAPIntValue().magicu();
+
+  // Multiply the numerator (operand 0) by the magic value
+  // FIXME: We should support doing a MUL in a wider type
+  SDValue Q;
+  if (isOperationLegalOrCustom(ISD::MULHU, VT))
+    Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
+                    DAG.getConstant(magics.m, VT));
+  else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+    Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
+                              N->getOperand(0),
+                              DAG.getConstant(magics.m, VT)).getNode(), 1);
+  else
+    return SDValue();       // No mulhu or equvialent
+  if (Created)
+    Created->push_back(Q.getNode());
+
+  if (magics.a == 0) {
+    assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+           "We shouldn't generate an undefined shift!");
+    return DAG.getNode(ISD::SRL, dl, VT, Q, 
+                       DAG.getConstant(magics.s, getShiftAmountTy()));
+  } else {
+    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, 
+                      DAG.getConstant(1, getShiftAmountTy()));
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    return DAG.getNode(ISD::SRL, dl, VT, NPQ, 
+                       DAG.getConstant(magics.s-1, getShiftAmountTy()));
+  }
+}