It's not necessary to do rounding for alloca operations when the requested alignment is equal to the stack alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8

commit: f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc [log] [tgz]
author: Dan Gohman <djg@cray.com> Wed Jul 18 16:29:46 2007 +0000
committer: Dan Gohman <djg@cray.com> Wed Jul 18 16:29:46 2007 +0000
tree: ebb79ea1ee5e3bc1fdf38541a811a8b804f0679a
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
new file mode 100644
index 0000000..defbe34
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp

@@ -0,0 +1,102 @@
+//===-- llvm/CallingConvLower.cpp - Calling Conventions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
+                 SmallVector<CCValAssign, 16> &locs)
+  : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+    MRI(*TM.getRegisterInfo()), Locs(locs) {
+  // No stack is used.
+  StackOffset = 0;
+  
+  UsedRegs.resize(MRI.getNumRegs());
+}
+
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+  UsedRegs[Reg/32] |= 1 << (Reg&31);
+  
+  if (const unsigned *RegAliases = MRI.getAliasSet(Reg))
+    for (; (Reg = *RegAliases); ++RegAliases)
+      UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) {
+  unsigned NumArgs = TheArgs->getNumValues()-1;
+  
+  for (unsigned i = 0; i != NumArgs; ++i) {
+    MVT::ValueType ArgVT = TheArgs->getValueType(i);
+    SDOperand FlagOp = TheArgs->getOperand(3+i);
+    unsigned ArgFlags = cast<ConstantSDNode>(FlagOp)->getValue();
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+      cerr << "Formal argument #" << i << " has unhandled type "
+           << MVT::getValueTypeString(ArgVT) << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) {
+    MVT::ValueType VT = TheRet->getOperand(i*2+1).getValueType();
+    if (Fn(i, VT, VT, CCValAssign::Full,
+           cast<ConstantSDNode>(TheRet->getOperand(i*2+2))->getValue(), *this)){
+      cerr << "Return operand #" << i << " has unhandled type "
+           << MVT::getValueTypeString(VT) << "\n";
+      abort();
+    }
+  }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void CCState::AnalyzeCallOperands(SDNode *TheCall, CCAssignFn Fn) {
+  unsigned NumOps = (TheCall->getNumOperands() - 5) / 2;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType();
+    SDOperand FlagOp = TheCall->getOperand(5+2*i+1);
+    unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue();
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+      cerr << "Call operand #" << i << " has unhandled type "
+           << MVT::getValueTypeString(ArgVT) << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(SDNode *TheCall, CCAssignFn Fn) {
+  for (unsigned i = 0, e = TheCall->getNumValues() - 1; i != e; ++i) {
+    MVT::ValueType VT = TheCall->getValueType(i);
+    if (Fn(i, VT, VT, CCValAssign::Full, 0, *this)) {
+      cerr << "Call result #" << i << " has unhandled type "
+           << MVT::getValueTypeString(VT) << "\n";
+      abort();
+    }
+  }
+}
+

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000..22c6e6b
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -0,0 +1,4749 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
+// both before and after the DAG is legalized.
+//
+// FIXME: Missing folds
+// sdiv, udiv, srem, urem (X, const) where X is an integer can be expanded into
+//  a sequence of multiplies, shifts, and adds.  This should be controlled by
+//  some kind of hint from the target that int div is expensive.
+// various folds of mulh[s,u] by constants such as -1, powers of 2, etc.
+//
+// FIXME: select C, pow2, pow2 -> something smart
+// FIXME: trunc(select X, Y, Z) -> select X, trunc(Y), trunc(Z)
+// FIXME: Dead stores -> nuke
+// FIXME: shr X, (and Y,31) -> shr X, Y   (TRICKY!)
+// FIXME: mul (x, const) -> shifts + adds
+// FIXME: undef values
+// FIXME: divide by zero is currently left unfolded.  do we want to turn this
+//        into an undef?
+// FIXME: select ne (select cc, 1, 0), 0, true, false -> select cc, true, false
+// 
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined   , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+
+namespace {
+#ifndef NDEBUG
+  static cl::opt<bool>
+    ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+                    cl::desc("Pop up a window to show dags before the first "
+                             "dag combine pass"));
+  static cl::opt<bool>
+    ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+                    cl::desc("Pop up a window to show dags before the second "
+                             "dag combine pass"));
+#else
+  static const bool ViewDAGCombine1 = false;
+  static const bool ViewDAGCombine2 = false;
+#endif
+  
+  static cl::opt<bool>
+    CombinerAA("combiner-alias-analysis", cl::Hidden,
+               cl::desc("Turn on alias analysis during testing"));
+
+  static cl::opt<bool>
+    CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+               cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+  class VISIBILITY_HIDDEN DAGCombiner {
+    SelectionDAG &DAG;
+    TargetLowering &TLI;
+    bool AfterLegalize;
+
+    // Worklist of all of the nodes that need to be simplified.
+    std::vector<SDNode*> WorkList;
+
+    // AA - Used for DAG load/store alias analysis.
+    AliasAnalysis &AA;
+
+    /// AddUsersToWorkList - When an instruction is simplified, add all users of
+    /// the instruction to the work lists because they might get more simplified
+    /// now.
+    ///
+    void AddUsersToWorkList(SDNode *N) {
+      for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+           UI != UE; ++UI)
+        AddToWorkList(*UI);
+    }
+
+    /// removeFromWorkList - remove all instances of N from the worklist.
+    ///
+    void removeFromWorkList(SDNode *N) {
+      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+                     WorkList.end());
+    }
+    
+  public:
+    /// AddToWorkList - Add to the work list making sure it's instance is at the
+    /// the back (next to be processed.)
+    void AddToWorkList(SDNode *N) {
+      removeFromWorkList(N);
+      WorkList.push_back(N);
+    }
+
+    SDOperand CombineTo(SDNode *N, const SDOperand *To, unsigned NumTo,
+                        bool AddTo = true) {
+      assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+      ++NodesCombined;
+      DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
+      DOUT << "\nWith: "; DEBUG(To[0].Val->dump(&DAG));
+      DOUT << " and " << NumTo-1 << " other values\n";
+      std::vector<SDNode*> NowDead;
+      DAG.ReplaceAllUsesWith(N, To, &NowDead);
+      
+      if (AddTo) {
+        // Push the new nodes and any users onto the worklist
+        for (unsigned i = 0, e = NumTo; i != e; ++i) {
+          AddToWorkList(To[i].Val);
+          AddUsersToWorkList(To[i].Val);
+        }
+      }
+      
+      // Nodes can be reintroduced into the worklist.  Make sure we do not
+      // process a node that has been replaced.
+      removeFromWorkList(N);
+      for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+        removeFromWorkList(NowDead[i]);
+      
+      // Finally, since the node is now dead, remove it from the graph.
+      DAG.DeleteNode(N);
+      return SDOperand(N, 0);
+    }
+    
+    SDOperand CombineTo(SDNode *N, SDOperand Res, bool AddTo = true) {
+      return CombineTo(N, &Res, 1, AddTo);
+    }
+    
+    SDOperand CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1,
+                        bool AddTo = true) {
+      SDOperand To[] = { Res0, Res1 };
+      return CombineTo(N, To, 2, AddTo);
+    }
+  private:    
+    
+    /// SimplifyDemandedBits - Check the specified integer node value to see if
+    /// it can be simplified or if things it uses can be simplified by bit
+    /// propagation.  If so, return true.
+    bool SimplifyDemandedBits(SDOperand Op) {
+      TargetLowering::TargetLoweringOpt TLO(DAG);
+      uint64_t KnownZero, KnownOne;
+      uint64_t Demanded = MVT::getIntVTBitMask(Op.getValueType());
+      if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+        return false;
+
+      // Revisit the node.
+      AddToWorkList(Op.Val);
+      
+      // Replace the old value with the new one.
+      ++NodesCombined;
+      DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.Val->dump(&DAG));
+      DOUT << "\nWith: "; DEBUG(TLO.New.Val->dump(&DAG));
+      DOUT << '\n';
+
+      std::vector<SDNode*> NowDead;
+      DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, NowDead);
+      
+      // Push the new node and any (possibly new) users onto the worklist.
+      AddToWorkList(TLO.New.Val);
+      AddUsersToWorkList(TLO.New.Val);
+      
+      // Nodes can end up on the worklist more than once.  Make sure we do
+      // not process a node that has been replaced.
+      for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+        removeFromWorkList(NowDead[i]);
+      
+      // Finally, if the node is now dead, remove it from the graph.  The node
+      // may not be dead if the replacement process recursively simplified to
+      // something else needing this node.
+      if (TLO.Old.Val->use_empty()) {
+        removeFromWorkList(TLO.Old.Val);
+        
+        // If the operands of this node are only used by the node, they will now
+        // be dead.  Make sure to visit them first to delete dead nodes early.
+        for (unsigned i = 0, e = TLO.Old.Val->getNumOperands(); i != e; ++i)
+          if (TLO.Old.Val->getOperand(i).Val->hasOneUse())
+            AddToWorkList(TLO.Old.Val->getOperand(i).Val);
+        
+        DAG.DeleteNode(TLO.Old.Val);
+      }
+      return true;
+    }
+
+    bool CombineToPreIndexedLoadStore(SDNode *N);
+    bool CombineToPostIndexedLoadStore(SDNode *N);
+    
+    
+    /// visit - call the node-specific routine that knows how to fold each
+    /// particular type of node.
+    SDOperand visit(SDNode *N);
+
+    // Visitation implementation - Implement dag node combining for different
+    // node types.  The semantics are as follows:
+    // Return Value:
+    //   SDOperand.Val == 0   - No change was made
+    //   SDOperand.Val == N   - N was replaced, is dead, and is already handled.
+    //   otherwise            - N should be replaced by the returned Operand.
+    //
+    SDOperand visitTokenFactor(SDNode *N);
+    SDOperand visitADD(SDNode *N);
+    SDOperand visitSUB(SDNode *N);
+    SDOperand visitADDC(SDNode *N);
+    SDOperand visitADDE(SDNode *N);
+    SDOperand visitMUL(SDNode *N);
+    SDOperand visitSDIV(SDNode *N);
+    SDOperand visitUDIV(SDNode *N);
+    SDOperand visitSREM(SDNode *N);
+    SDOperand visitUREM(SDNode *N);
+    SDOperand visitMULHU(SDNode *N);
+    SDOperand visitMULHS(SDNode *N);
+    SDOperand visitAND(SDNode *N);
+    SDOperand visitOR(SDNode *N);
+    SDOperand visitXOR(SDNode *N);
+    SDOperand SimplifyVBinOp(SDNode *N);
+    SDOperand visitSHL(SDNode *N);
+    SDOperand visitSRA(SDNode *N);
+    SDOperand visitSRL(SDNode *N);
+    SDOperand visitCTLZ(SDNode *N);
+    SDOperand visitCTTZ(SDNode *N);
+    SDOperand visitCTPOP(SDNode *N);
+    SDOperand visitSELECT(SDNode *N);
+    SDOperand visitSELECT_CC(SDNode *N);
+    SDOperand visitSETCC(SDNode *N);
+    SDOperand visitSIGN_EXTEND(SDNode *N);
+    SDOperand visitZERO_EXTEND(SDNode *N);
+    SDOperand visitANY_EXTEND(SDNode *N);
+    SDOperand visitSIGN_EXTEND_INREG(SDNode *N);
+    SDOperand visitTRUNCATE(SDNode *N);
+    SDOperand visitBIT_CONVERT(SDNode *N);
+    SDOperand visitFADD(SDNode *N);
+    SDOperand visitFSUB(SDNode *N);
+    SDOperand visitFMUL(SDNode *N);
+    SDOperand visitFDIV(SDNode *N);
+    SDOperand visitFREM(SDNode *N);
+    SDOperand visitFCOPYSIGN(SDNode *N);
+    SDOperand visitSINT_TO_FP(SDNode *N);
+    SDOperand visitUINT_TO_FP(SDNode *N);
+    SDOperand visitFP_TO_SINT(SDNode *N);
+    SDOperand visitFP_TO_UINT(SDNode *N);
+    SDOperand visitFP_ROUND(SDNode *N);
+    SDOperand visitFP_ROUND_INREG(SDNode *N);
+    SDOperand visitFP_EXTEND(SDNode *N);
+    SDOperand visitFNEG(SDNode *N);
+    SDOperand visitFABS(SDNode *N);
+    SDOperand visitBRCOND(SDNode *N);
+    SDOperand visitBR_CC(SDNode *N);
+    SDOperand visitLOAD(SDNode *N);
+    SDOperand visitSTORE(SDNode *N);
+    SDOperand visitINSERT_VECTOR_ELT(SDNode *N);
+    SDOperand visitBUILD_VECTOR(SDNode *N);
+    SDOperand visitCONCAT_VECTORS(SDNode *N);
+    SDOperand visitVECTOR_SHUFFLE(SDNode *N);
+
+    SDOperand XformToShuffleWithZero(SDNode *N);
+    SDOperand ReassociateOps(unsigned Opc, SDOperand LHS, SDOperand RHS);
+    
+    bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS);
+    SDOperand SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+    SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2);
+    SDOperand SimplifySelectCC(SDOperand N0, SDOperand N1, SDOperand N2, 
+                               SDOperand N3, ISD::CondCode CC, 
+                               bool NotExtCompare = false);
+    SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
+                            ISD::CondCode Cond, bool foldBooleans = true);
+    SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType);
+    SDOperand BuildSDIV(SDNode *N);
+    SDOperand BuildUDIV(SDNode *N);
+    SDNode *MatchRotate(SDOperand LHS, SDOperand RHS);
+    SDOperand ReduceLoadWidth(SDNode *N);
+    
+    /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for aliasing nodes and adding them to the Aliases vector.
+    void GatherAllAliases(SDNode *N, SDOperand OriginalChain,
+                          SmallVector<SDOperand, 8> &Aliases);
+
+    /// isAlias - Return true if there is any possibility that the two addresses
+    /// overlap.
+    bool isAlias(SDOperand Ptr1, int64_t Size1,
+                 const Value *SrcValue1, int SrcValueOffset1,
+                 SDOperand Ptr2, int64_t Size2,
+                 const Value *SrcValue2, int SrcValueOffset2);
+                 
+    /// FindAliasInfo - Extracts the relevant alias information from the memory
+    /// node.  Returns true if the operand was a load.
+    bool FindAliasInfo(SDNode *N,
+                       SDOperand &Ptr, int64_t &Size,
+                       const Value *&SrcValue, int &SrcValueOffset);
+                       
+    /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for a better chain (aliasing node.)
+    SDOperand FindBetterChain(SDNode *N, SDOperand Chain);
+    
+public:
+    DAGCombiner(SelectionDAG &D, AliasAnalysis &A)
+      : DAG(D),
+        TLI(D.getTargetLoweringInfo()),
+        AfterLegalize(false),
+        AA(A) {}
+    
+    /// Run - runs the dag combiner on all nodes in the work list
+    void Run(bool RunningAfterLegalize); 
+  };
+}
+
+//===----------------------------------------------------------------------===//
+//  TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+  ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDOperand> &To) {
+  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size());
+}
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDOperand Res) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res);
+}
+
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDOperand Op, unsigned Depth = 0) {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return 2;
+  
+  // Don't allow anything with multiple uses.
+  if (!Op.hasOneUse()) return 0;
+  
+  // Don't recurse exponentially.
+  if (Depth > 6) return 0;
+  
+  switch (Op.getOpcode()) {
+  default: return false;
+  case ISD::ConstantFP:
+    return 1;
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    if (!UnsafeFPMath) return 0;
+    
+    // -(A+B) -> -A - B
+    if (char V = isNegatibleForFree(Op.getOperand(0), Depth+1))
+      return V;
+    // -(A+B) -> -B - A
+    return isNegatibleForFree(Op.getOperand(1), Depth+1);
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros. 
+    if (!UnsafeFPMath) return 0;
+    
+    // -(A-B) -> B-A
+    return 1;
+    
+  case ISD::FMUL:
+  case ISD::FDIV:
+    if (HonorSignDependentRoundingFPMath()) return 0;
+    
+    // -(X*Y) -> (-X * Y) or (X*-Y)
+    if (char V = isNegatibleForFree(Op.getOperand(0), Depth+1))
+      return V;
+      
+    return isNegatibleForFree(Op.getOperand(1), Depth+1);
+    
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FSIN:
+    return isNegatibleForFree(Op.getOperand(0), Depth+1);
+  }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDOperand GetNegatedExpression(SDOperand Op, SelectionDAG &DAG,
+                                      unsigned Depth = 0) {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+  
+  // Don't allow anything with multiple uses.
+  assert(Op.hasOneUse() && "Unknown reuse!");
+  
+  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+  switch (Op.getOpcode()) {
+  default: assert(0 && "Unknown code");
+  case ISD::ConstantFP:
+    return DAG.getConstantFP(-cast<ConstantFPSDNode>(Op)->getValue(),
+                             Op.getValueType());
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    assert(UnsafeFPMath);
+    
+    // -(A+B) -> -A - B
+    if (isNegatibleForFree(Op.getOperand(0), Depth+1))
+      return DAG.getNode(ISD::FSUB, Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG, Depth+1),
+                         Op.getOperand(1));
+    // -(A+B) -> -B - A
+    return DAG.getNode(ISD::FSUB, Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(1), DAG, Depth+1),
+                       Op.getOperand(0));
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros. 
+    assert(UnsafeFPMath);
+
+    // -(0-B) -> B
+    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+      if (N0CFP->getValue() == 0.0)
+        return Op.getOperand(1);
+    
+    // -(A-B) -> B-A
+    return DAG.getNode(ISD::FSUB, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(0));
+    
+  case ISD::FMUL:
+  case ISD::FDIV:
+    assert(!HonorSignDependentRoundingFPMath());
+    
+    // -(X*Y) -> -X * Y
+    if (isNegatibleForFree(Op.getOperand(0), Depth+1))
+      return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG, Depth+1),
+                         Op.getOperand(1));
+      
+    // -(X*Y) -> X * -Y
+    return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+                       Op.getOperand(0),
+                       GetNegatedExpression(Op.getOperand(1), DAG, Depth+1));
+    
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FSIN:
+    return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(0), DAG, Depth+1));
+  }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate 
+// nodes based on the type of node we are checking.  This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDOperand N, SDOperand &LHS, SDOperand &RHS,
+                              SDOperand &CC) {
+  if (N.getOpcode() == ISD::SETCC) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(2);
+    return true;
+  }
+  if (N.getOpcode() == ISD::SELECT_CC && 
+      N.getOperand(2).getOpcode() == ISD::Constant &&
+      N.getOperand(3).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(N.getOperand(2))->getValue() == 1 &&
+      cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(4);
+    return true;
+  }
+  return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use.  If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDOperand N) {
+  SDOperand N0, N1, N2;
+  if (isSetCCEquivalent(N, N0, N1, N2) && N.Val->hasOneUse())
+    return true;
+  return false;
+}
+
+SDOperand DAGCombiner::ReassociateOps(unsigned Opc, SDOperand N0, SDOperand N1){
+  MVT::ValueType VT = N0.getValueType();
+  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+  if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+    if (isa<ConstantSDNode>(N1)) {
+      SDOperand OpNode = DAG.getNode(Opc, VT, N0.getOperand(1), N1);
+      AddToWorkList(OpNode.Val);
+      return DAG.getNode(Opc, VT, OpNode, N0.getOperand(0));
+    } else if (N0.hasOneUse()) {
+      SDOperand OpNode = DAG.getNode(Opc, VT, N0.getOperand(0), N1);
+      AddToWorkList(OpNode.Val);
+      return DAG.getNode(Opc, VT, OpNode, N0.getOperand(1));
+    }
+  }
+  // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+  if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+    if (isa<ConstantSDNode>(N0)) {
+      SDOperand OpNode = DAG.getNode(Opc, VT, N1.getOperand(1), N0);
+      AddToWorkList(OpNode.Val);
+      return DAG.getNode(Opc, VT, OpNode, N1.getOperand(0));
+    } else if (N1.hasOneUse()) {
+      SDOperand OpNode = DAG.getNode(Opc, VT, N1.getOperand(0), N0);
+      AddToWorkList(OpNode.Val);
+      return DAG.getNode(Opc, VT, OpNode, N1.getOperand(1));
+    }
+  }
+  return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+//  Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(bool RunningAfterLegalize) {
+  // set the instance variable, so that the various visit routines may use it.
+  AfterLegalize = RunningAfterLegalize;
+
+  // Add all the dag nodes to the worklist.
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I)
+    WorkList.push_back(I);
+  
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted, and tracking any
+  // changes of the root.
+  HandleSDNode Dummy(DAG.getRoot());
+  
+  // The root of the dag may dangle to deleted nodes until the dag combiner is
+  // done.  Set it to null to avoid confusion.
+  DAG.setRoot(SDOperand());
+  
+  /// DagCombineInfo - Expose the DAG combiner to the target combiner impls.
+  TargetLowering::DAGCombinerInfo 
+    DagCombineInfo(DAG, !RunningAfterLegalize, false, this);
+
+  // while the worklist isn't empty, inspect the node on the end of it and
+  // try and combine it.
+  while (!WorkList.empty()) {
+    SDNode *N = WorkList.back();
+    WorkList.pop_back();
+    
+    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
+    // N is deleted from the DAG, since they too may now be dead or may have a
+    // reduced number of uses, allowing other xforms.
+    if (N->use_empty() && N != &Dummy) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        AddToWorkList(N->getOperand(i).Val);
+      
+      DAG.DeleteNode(N);
+      continue;
+    }
+    
+    SDOperand RV = visit(N);
+    
+    // If nothing happened, try a target-specific DAG combine.
+    if (RV.Val == 0) {
+      assert(N->getOpcode() != ISD::DELETED_NODE &&
+             "Node was deleted but visit returned NULL!");
+      if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+          TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode()))
+        RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+    }
+    
+    if (RV.Val) {
+      ++NodesCombined;
+      // If we get back the same node we passed in, rather than a new node or
+      // zero, we know that the node must have defined multiple values and
+      // CombineTo was used.  Since CombineTo takes care of the worklist 
+      // mechanics for us, we have no work to do in this case.
+      if (RV.Val != N) {
+        assert(N->getOpcode() != ISD::DELETED_NODE &&
+               RV.Val->getOpcode() != ISD::DELETED_NODE &&
+               "Node was deleted but visit returned new node!");
+
+        DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
+        DOUT << "\nWith: "; DEBUG(RV.Val->dump(&DAG));
+        DOUT << '\n';
+        std::vector<SDNode*> NowDead;
+        if (N->getNumValues() == RV.Val->getNumValues())
+          DAG.ReplaceAllUsesWith(N, RV.Val, &NowDead);
+        else {
+          assert(N->getValueType(0) == RV.getValueType() && "Type mismatch");
+          SDOperand OpV = RV;
+          DAG.ReplaceAllUsesWith(N, &OpV, &NowDead);
+        }
+          
+        // Push the new node and any users onto the worklist
+        AddToWorkList(RV.Val);
+        AddUsersToWorkList(RV.Val);
+          
+        // Nodes can be reintroduced into the worklist.  Make sure we do not
+        // process a node that has been replaced.
+        removeFromWorkList(N);
+        for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+          removeFromWorkList(NowDead[i]);
+        
+        // Finally, since the node is now dead, remove it from the graph.
+        DAG.DeleteNode(N);
+      }
+    }
+  }
+  
+  // If the root changed (e.g. it was a dead load, update the root).
+  DAG.setRoot(Dummy.getValue());
+}
+
+SDOperand DAGCombiner::visit(SDNode *N) {
+  switch(N->getOpcode()) {
+  default: break;
+  case ISD::TokenFactor:        return visitTokenFactor(N);
+  case ISD::ADD:                return visitADD(N);
+  case ISD::SUB:                return visitSUB(N);
+  case ISD::ADDC:               return visitADDC(N);
+  case ISD::ADDE:               return visitADDE(N);
+  case ISD::MUL:                return visitMUL(N);
+  case ISD::SDIV:               return visitSDIV(N);
+  case ISD::UDIV:               return visitUDIV(N);
+  case ISD::SREM:               return visitSREM(N);
+  case ISD::UREM:               return visitUREM(N);
+  case ISD::MULHU:              return visitMULHU(N);
+  case ISD::MULHS:              return visitMULHS(N);
+  case ISD::AND:                return visitAND(N);
+  case ISD::OR:                 return visitOR(N);
+  case ISD::XOR:                return visitXOR(N);
+  case ISD::SHL:                return visitSHL(N);
+  case ISD::SRA:                return visitSRA(N);
+  case ISD::SRL:                return visitSRL(N);
+  case ISD::CTLZ:               return visitCTLZ(N);
+  case ISD::CTTZ:               return visitCTTZ(N);
+  case ISD::CTPOP:              return visitCTPOP(N);
+  case ISD::SELECT:             return visitSELECT(N);
+  case ISD::SELECT_CC:          return visitSELECT_CC(N);
+  case ISD::SETCC:              return visitSETCC(N);
+  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
+  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
+  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
+  case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::TRUNCATE:           return visitTRUNCATE(N);
+  case ISD::BIT_CONVERT:        return visitBIT_CONVERT(N);
+  case ISD::FADD:               return visitFADD(N);
+  case ISD::FSUB:               return visitFSUB(N);
+  case ISD::FMUL:               return visitFMUL(N);
+  case ISD::FDIV:               return visitFDIV(N);
+  case ISD::FREM:               return visitFREM(N);
+  case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
+  case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
+  case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
+  case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
+  case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
+  case ISD::FP_ROUND:           return visitFP_ROUND(N);
+  case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
+  case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
+  case ISD::FNEG:               return visitFNEG(N);
+  case ISD::FABS:               return visitFABS(N);
+  case ISD::BRCOND:             return visitBRCOND(N);
+  case ISD::BR_CC:              return visitBR_CC(N);
+  case ISD::LOAD:               return visitLOAD(N);
+  case ISD::STORE:              return visitSTORE(N);
+  case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
+  case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
+  case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
+  case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  }
+  return SDOperand();
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDOperand getInputChainForNode(SDNode *N) {
+  if (unsigned NumOps = N->getNumOperands()) {
+    if (N->getOperand(0).getValueType() == MVT::Other)
+      return N->getOperand(0);
+    else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+      return N->getOperand(NumOps-1);
+    for (unsigned i = 1; i < NumOps-1; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other)
+        return N->getOperand(i);
+  }
+  return SDOperand(0, 0);
+}
+
+SDOperand DAGCombiner::visitTokenFactor(SDNode *N) {
+  // If N has two operands, where one has an input chain equal to the other,
+  // the 'other' chain is redundant.
+  if (N->getNumOperands() == 2) {
+    if (getInputChainForNode(N->getOperand(0).Val) == N->getOperand(1))
+      return N->getOperand(0);
+    if (getInputChainForNode(N->getOperand(1).Val) == N->getOperand(0))
+      return N->getOperand(1);
+  }
+  
+  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
+  SmallVector<SDOperand, 8> Ops;    // Ops for replacing token factor.
+  SmallPtrSet<SDNode*, 16> SeenOps; 
+  bool Changed = false;             // If we should replace this token factor.
+  
+  // Start out with this token factor.
+  TFs.push_back(N);
+  
+  // Iterate through token factors.  The TFs grows when new token factors are
+  // encountered.
+  for (unsigned i = 0; i < TFs.size(); ++i) {
+    SDNode *TF = TFs[i];
+    
+    // Check each of the operands.
+    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+      SDOperand Op = TF->getOperand(i);
+      
+      switch (Op.getOpcode()) {
+      case ISD::EntryToken:
+        // Entry tokens don't need to be added to the list. They are
+        // rededundant.
+        Changed = true;
+        break;
+        
+      case ISD::TokenFactor:
+        if ((CombinerAA || Op.hasOneUse()) &&
+            std::find(TFs.begin(), TFs.end(), Op.Val) == TFs.end()) {
+          // Queue up for processing.
+          TFs.push_back(Op.Val);
+          // Clean up in case the token factor is removed.
+          AddToWorkList(Op.Val);
+          Changed = true;
+          break;
+        }
+        // Fall thru
+        
+      default:
+        // Only add if it isn't already in the list.
+        if (SeenOps.insert(Op.Val))
+          Ops.push_back(Op);
+        else
+          Changed = true;
+        break;
+      }
+    }
+  }
+
+  SDOperand Result;
+
+  // If we've change things around then replace token factor.
+  if (Changed) {
+    if (Ops.size() == 0) {
+      // The entry token is the only possible outcome.
+      Result = DAG.getEntryNode();
+    } else {
+      // New and improved token factor.
+      Result = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], Ops.size());
+    }
+    
+    // Don't add users to work list.
+    return CombineTo(N, Result, false);
+  }
+  
+  return Result;
+}
+
+static
+SDOperand combineShlAddConstant(SDOperand N0, SDOperand N1, SelectionDAG &DAG) {
+  MVT::ValueType VT = N0.getValueType();
+  SDOperand N00 = N0.getOperand(0);
+  SDOperand N01 = N0.getOperand(1);
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+  if (N01C && N00.getOpcode() == ISD::ADD && N00.Val->hasOneUse() &&
+      isa<ConstantSDNode>(N00.getOperand(1))) {
+    N0 = DAG.getNode(ISD::ADD, VT,
+                     DAG.getNode(ISD::SHL, VT, N00.getOperand(0), N01),
+                     DAG.getNode(ISD::SHL, VT, N00.getOperand(1), N01));
+    return DAG.getNode(ISD::ADD, VT, N0, N1);
+  }
+  return SDOperand();
+}
+
+static
+SDOperand combineSelectAndUse(SDNode *N, SDOperand Slct, SDOperand OtherOp,
+                              SelectionDAG &DAG) {
+  MVT::ValueType VT = N->getValueType(0);
+  unsigned Opc = N->getOpcode();
+  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+  SDOperand LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+  SDOperand RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+  ISD::CondCode CC = ISD::SETCC_INVALID;
+  if (isSlctCC)
+    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+  else {
+    SDOperand CCOp = Slct.getOperand(0);
+    if (CCOp.getOpcode() == ISD::SETCC)
+      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+  }
+
+  bool DoXform = false;
+  bool InvCC = false;
+  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+          "Bad input!");
+  if (LHS.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(LHS)->isNullValue())
+    DoXform = true;
+  else if (CC != ISD::SETCC_INVALID &&
+           RHS.getOpcode() == ISD::Constant &&
+           cast<ConstantSDNode>(RHS)->isNullValue()) {
+    std::swap(LHS, RHS);
+    bool isInt = MVT::isInteger(isSlctCC ? Slct.getOperand(0).getValueType()
+                                : Slct.getOperand(0).getOperand(0).getValueType());
+    CC = ISD::getSetCCInverse(CC, isInt);
+    DoXform = true;
+    InvCC = true;
+  }
+
+  if (DoXform) {
+    SDOperand Result = DAG.getNode(Opc, VT, OtherOp, RHS);
+    if (isSlctCC)
+      return DAG.getSelectCC(OtherOp, Result,
+                             Slct.getOperand(0), Slct.getOperand(1), CC);
+    SDOperand CCOp = Slct.getOperand(0);
+    if (InvCC)
+      CCOp = DAG.getSetCC(CCOp.getValueType(), CCOp.getOperand(0),
+                          CCOp.getOperand(1), CC);
+    return DAG.getNode(ISD::SELECT, VT, CCOp, OtherOp, Result);
+  }
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADD(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (add x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (add c1, c2) -> c1+c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::ADD, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADD, VT, N1, N0);
+  // fold (add x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold ((c1-A)+c2) -> (c1+c2)-A
+  if (N1C && N0.getOpcode() == ISD::SUB)
+    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+      return DAG.getNode(ISD::SUB, VT,
+                         DAG.getConstant(N1C->getValue()+N0C->getValue(), VT),
+                         N0.getOperand(1));
+  // reassociate add
+  SDOperand RADD = ReassociateOps(ISD::ADD, N0, N1);
+  if (RADD.Val != 0)
+    return RADD;
+  // fold ((0-A) + B) -> B-A
+  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, VT, N1, N0.getOperand(1));
+  // fold (A + (0-B)) -> A-B
+  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, VT, N0, N1.getOperand(1));
+  // fold (A+(B-A)) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+    return N1.getOperand(0);
+
+  if (!MVT::isVector(VT) && SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  // fold (a+b) -> (a|b) iff a and b share no bits.
+  if (MVT::isInteger(VT) && !MVT::isVector(VT)) {
+    uint64_t LHSZero, LHSOne;
+    uint64_t RHSZero, RHSOne;
+    uint64_t Mask = MVT::getIntVTBitMask(VT);
+    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+    if (LHSZero) {
+      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+      
+      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+      if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+        return DAG.getNode(ISD::OR, VT, N0, N1);
+    }
+  }
+
+  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+  if (N0.getOpcode() == ISD::SHL && N0.Val->hasOneUse()) {
+    SDOperand Result = combineShlAddConstant(N0, N1, DAG);
+    if (Result.Val) return Result;
+  }
+  if (N1.getOpcode() == ISD::SHL && N1.Val->hasOneUse()) {
+    SDOperand Result = combineShlAddConstant(N1, N0, DAG);
+    if (Result.Val) return Result;
+  }
+
+  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+  if (N0.getOpcode() == ISD::SELECT && N0.Val->hasOneUse()) {
+    SDOperand Result = combineSelectAndUse(N, N0, N1, DAG);
+    if (Result.Val) return Result;
+  }
+  if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) {
+    SDOperand Result = combineSelectAndUse(N, N1, N0, DAG);
+    if (Result.Val) return Result;
+  }
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADDC(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // If the flag result is dead, turn this into an ADD.
+  if (N->hasNUsesOfValue(0, 1))
+    return CombineTo(N, DAG.getNode(ISD::ADD, VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+  
+  // canonicalize constant to RHS.
+  if (N0C && !N1C) {
+    SDOperand Ops[] = { N1, N0 };
+    return DAG.getNode(ISD::ADDC, N->getVTList(), Ops, 2);
+  }
+  
+  // fold (addc x, 0) -> x + no carry out
+  if (N1C && N1C->isNullValue())
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+  
+  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+  uint64_t LHSZero, LHSOne;
+  uint64_t RHSZero, RHSOne;
+  uint64_t Mask = MVT::getIntVTBitMask(VT);
+  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+  if (LHSZero) {
+    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+    
+    // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+    // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+    if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+        (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+      return CombineTo(N, DAG.getNode(ISD::OR, VT, N0, N1),
+                       DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADDE(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand CarryIn = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  //MVT::ValueType VT = N0.getValueType();
+  
+  // canonicalize constant to RHS
+  if (N0C && !N1C) {
+    SDOperand Ops[] = { N1, N0, CarryIn };
+    return DAG.getNode(ISD::ADDE, N->getVTList(), Ops, 3);
+  }
+  
+  // fold (adde x, y, false) -> (addc x, y)
+  if (CarryIn.getOpcode() == ISD::CARRY_FALSE) {
+    SDOperand Ops[] = { N1, N0 };
+    return DAG.getNode(ISD::ADDC, N->getVTList(), Ops, 2);
+  }
+  
+  return SDOperand();
+}
+
+
+
+SDOperand DAGCombiner::visitSUB(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (sub x, x) -> 0
+  if (N0 == N1)
+    return DAG.getConstant(0, N->getValueType(0));
+  // fold (sub c1, c2) -> c1-c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::SUB, VT, N0, N1);
+  // fold (sub x, c) -> (add x, -c)
+  if (N1C)
+    return DAG.getNode(ISD::ADD, VT, N0, DAG.getConstant(-N1C->getValue(), VT));
+  // fold (A+B)-A -> B
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+    return N0.getOperand(1);
+  // fold (A+B)-B -> A
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+    return N0.getOperand(0);
+  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+  if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) {
+    SDOperand Result = combineSelectAndUse(N, N1, N0, DAG);
+    if (Result.Val) return Result;
+  }
+  // If either operand of a sub is undef, the result is undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMUL(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (mul x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (mul c1, c2) -> c1*c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::MUL, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::MUL, VT, N1, N0);
+  // fold (mul x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mul x, -1) -> 0-x
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
+  // fold (mul x, (1 << c)) -> x << c
+  if (N1C && isPowerOf2_64(N1C->getValue()))
+    return DAG.getNode(ISD::SHL, VT, N0,
+                       DAG.getConstant(Log2_64(N1C->getValue()),
+                                       TLI.getShiftAmountTy()));
+  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+  if (N1C && isPowerOf2_64(-N1C->getSignExtended())) {
+    // FIXME: If the input is something that is easily negated (e.g. a 
+    // single-use add), we should put the negate there.
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT),
+                       DAG.getNode(ISD::SHL, VT, N0,
+                            DAG.getConstant(Log2_64(-N1C->getSignExtended()),
+                                            TLI.getShiftAmountTy())));
+  }
+
+  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+  if (N1C && N0.getOpcode() == ISD::SHL && 
+      isa<ConstantSDNode>(N0.getOperand(1))) {
+    SDOperand C3 = DAG.getNode(ISD::SHL, VT, N1, N0.getOperand(1));
+    AddToWorkList(C3.Val);
+    return DAG.getNode(ISD::MUL, VT, N0.getOperand(0), C3);
+  }
+  
+  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+  // use.
+  {
+    SDOperand Sh(0,0), Y(0,0);
+    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
+    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+        N0.Val->hasOneUse()) {
+      Sh = N0; Y = N1;
+    } else if (N1.getOpcode() == ISD::SHL && 
+               isa<ConstantSDNode>(N1.getOperand(1)) && N1.Val->hasOneUse()) {
+      Sh = N1; Y = N0;
+    }
+    if (Sh.Val) {
+      SDOperand Mul = DAG.getNode(ISD::MUL, VT, Sh.getOperand(0), Y);
+      return DAG.getNode(ISD::SHL, VT, Mul, Sh.getOperand(1));
+    }
+  }
+  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+  if (N1C && N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse() && 
+      isa<ConstantSDNode>(N0.getOperand(1))) {
+    return DAG.getNode(ISD::ADD, VT, 
+                       DAG.getNode(ISD::MUL, VT, N0.getOperand(0), N1),
+                       DAG.getNode(ISD::MUL, VT, N0.getOperand(1), N1));
+  }
+  
+  // reassociate mul
+  SDOperand RMUL = ReassociateOps(ISD::MUL, N0, N1);
+  if (RMUL.Val != 0)
+    return RMUL;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSDIV(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (sdiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.getNode(ISD::SDIV, VT, N0, N1);
+  // fold (sdiv X, 1) -> X
+  if (N1C && N1C->getSignExtended() == 1LL)
+    return N0;
+  // fold (sdiv X, -1) -> 0-X
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
+  uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
+  if (DAG.MaskedValueIsZero(N1, SignBit) &&
+      DAG.MaskedValueIsZero(N0, SignBit))
+    return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
+  // fold (sdiv X, pow2) -> simple ops after legalize
+  if (N1C && N1C->getValue() && !TLI.isIntDivCheap() &&
+      (isPowerOf2_64(N1C->getSignExtended()) || 
+       isPowerOf2_64(-N1C->getSignExtended()))) {
+    // If dividing by powers of two is cheap, then don't perform the following
+    // fold.
+    if (TLI.isPow2DivCheap())
+      return SDOperand();
+    int64_t pow2 = N1C->getSignExtended();
+    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+    unsigned lg2 = Log2_64(abs2);
+    // Splat the sign bit into the register
+    SDOperand SGN = DAG.getNode(ISD::SRA, VT, N0,
+                                DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                                TLI.getShiftAmountTy()));
+    AddToWorkList(SGN.Val);
+    // Add (N0 < 0) ? abs2 - 1 : 0;
+    SDOperand SRL = DAG.getNode(ISD::SRL, VT, SGN,
+                                DAG.getConstant(MVT::getSizeInBits(VT)-lg2,
+                                                TLI.getShiftAmountTy()));
+    SDOperand ADD = DAG.getNode(ISD::ADD, VT, N0, SRL);
+    AddToWorkList(SRL.Val);
+    AddToWorkList(ADD.Val);    // Divide by pow2
+    SDOperand SRA = DAG.getNode(ISD::SRA, VT, ADD,
+                                DAG.getConstant(lg2, TLI.getShiftAmountTy()));
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (pow2 > 0)
+      return SRA;
+    AddToWorkList(SRA.Val);
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA);
+  }
+  // if integer divide is expensive and we satisfy the requirements, emit an
+  // alternate sequence.
+  if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && 
+      !TLI.isIntDivCheap()) {
+    SDOperand Op = BuildSDIV(N);
+    if (Op.Val) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUDIV(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (udiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.getNode(ISD::UDIV, VT, N0, N1);
+  // fold (udiv x, (1 << c)) -> x >>u c
+  if (N1C && isPowerOf2_64(N1C->getValue()))
+    return DAG.getNode(ISD::SRL, VT, N0, 
+                       DAG.getConstant(Log2_64(N1C->getValue()),
+                                       TLI.getShiftAmountTy()));
+  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (isPowerOf2_64(SHC->getValue())) {
+        MVT::ValueType ADDVT = N1.getOperand(1).getValueType();
+        SDOperand Add = DAG.getNode(ISD::ADD, ADDVT, N1.getOperand(1),
+                                    DAG.getConstant(Log2_64(SHC->getValue()),
+                                                    ADDVT));
+        AddToWorkList(Add.Val);
+        return DAG.getNode(ISD::SRL, VT, N0, Add);
+      }
+    }
+  }
+  // fold (udiv x, c) -> alternate
+  if (N1C && N1C->getValue() && !TLI.isIntDivCheap()) {
+    SDOperand Op = BuildUDIV(N);
+    if (Op.Val) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSREM(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (srem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.getNode(ISD::SREM, VT, N0, N1);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+  uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
+  if (DAG.MaskedValueIsZero(N1, SignBit) &&
+      DAG.MaskedValueIsZero(N0, SignBit))
+    return DAG.getNode(ISD::UREM, VT, N0, N1);
+  
+  // Unconditionally lower X%C -> X-X/C*C.  This allows the X/C logic to hack on
+  // the remainder operation.
+  if (N1C && !N1C->isNullValue()) {
+    SDOperand Div = DAG.getNode(ISD::SDIV, VT, N0, N1);
+    SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1);
+    SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul);
+    AddToWorkList(Div.Val);
+    AddToWorkList(Mul.Val);
+    return Sub;
+  }
+  
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUREM(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (urem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.getNode(ISD::UREM, VT, N0, N1);
+  // fold (urem x, pow2) -> (and x, pow2-1)
+  if (N1C && !N1C->isNullValue() && isPowerOf2_64(N1C->getValue()))
+    return DAG.getNode(ISD::AND, VT, N0, DAG.getConstant(N1C->getValue()-1,VT));
+  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (isPowerOf2_64(SHC->getValue())) {
+        SDOperand Add = DAG.getNode(ISD::ADD, VT, N1,DAG.getConstant(~0ULL,VT));
+        AddToWorkList(Add.Val);
+        return DAG.getNode(ISD::AND, VT, N0, Add);
+      }
+    }
+  }
+  
+  // Unconditionally lower X%C -> X-X/C*C.  This allows the X/C logic to hack on
+  // the remainder operation.
+  if (N1C && !N1C->isNullValue()) {
+    SDOperand Div = DAG.getNode(ISD::UDIV, VT, N0, N1);
+    SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1);
+    SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul);
+    AddToWorkList(Div.Val);
+    AddToWorkList(Mul.Val);
+    return Sub;
+  }
+  
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMULHS(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (mulhs x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhs x, 1) -> (sra x, size(x)-1)
+  if (N1C && N1C->getValue() == 1)
+    return DAG.getNode(ISD::SRA, N0.getValueType(), N0, 
+                       DAG.getConstant(MVT::getSizeInBits(N0.getValueType())-1,
+                                       TLI.getShiftAmountTy()));
+  // fold (mulhs x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMULHU(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (mulhu x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhu x, 1) -> 0
+  if (N1C && N1C->getValue() == 1)
+    return DAG.getConstant(0, N0.getValueType());
+  // fold (mulhu x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  return SDOperand();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDOperand DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+  SDOperand N0 = N->getOperand(0), N1 = N->getOperand(1);
+  MVT::ValueType VT = N0.getValueType();
+  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+  
+  // For each of OP in AND/OR/XOR:
+  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+       N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::TRUNCATE) &&
+      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+    SDOperand ORNode = DAG.getNode(N->getOpcode(), 
+                                   N0.getOperand(0).getValueType(),
+                                   N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.Val);
+    return DAG.getNode(N0.getOpcode(), VT, ORNode);
+  }
+  
+  // For each of OP in SHL/SRL/SRA/AND...
+  //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+  //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
+  //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+       N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+      N0.getOperand(1) == N1.getOperand(1)) {
+    SDOperand ORNode = DAG.getNode(N->getOpcode(),
+                                   N0.getOperand(0).getValueType(),
+                                   N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.Val);
+    return DAG.getNode(N0.getOpcode(), VT, ORNode, N0.getOperand(1));
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitAND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N1.getValueType();
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (and x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (and c1, c2) -> c1&c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::AND, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::AND, VT, N1, N0);
+  // fold (and x, -1) -> x
+  if (N1C && N1C->isAllOnesValue())
+    return N0;
+  // if (and x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
+    return DAG.getConstant(0, VT);
+  // reassociate and
+  SDOperand RAND = ReassociateOps(ISD::AND, N0, N1);
+  if (RAND.Val != 0)
+    return RAND;
+  // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+  if (N1C && N0.getOpcode() == ISD::OR)
+    if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if ((ORI->getValue() & N1C->getValue()) == N1C->getValue())
+        return N1;
+  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    unsigned InMask = MVT::getIntVTBitMask(N0.getOperand(0).getValueType());
+    if (DAG.MaskedValueIsZero(N0.getOperand(0),
+                              ~N1C->getValue() & InMask)) {
+      SDOperand Zext = DAG.getNode(ISD::ZERO_EXTEND, N0.getValueType(),
+                                   N0.getOperand(0));
+      
+      // Replace uses of the AND with uses of the Zero extend node.
+      CombineTo(N, Zext);
+      
+      // We actually want to replace all uses of the any_extend with the
+      // zero_extend, to avoid duplicating things.  This will later cause this
+      // AND to be folded.
+      CombineTo(N0.Val, Zext);
+      return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+    
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        MVT::isInteger(LL.getValueType())) {
+      // fold (X == 0) & (Y == 0) -> (X|Y == 0)
+      if (cast<ConstantSDNode>(LR)->getValue() == 0 && Op1 == ISD::SETEQ) {
+        SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.Val);
+        return DAG.getSetCC(VT, ORNode, LR, Op1);
+      }
+      // fold (X == -1) & (Y == -1) -> (X&Y == -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+        SDOperand ANDNode = DAG.getNode(ISD::AND, LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.Val);
+        return DAG.getSetCC(VT, ANDNode, LR, Op1);
+      }
+      // fold (X >  -1) & (Y >  -1) -> (X|Y > -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+        SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.Val);
+        return DAG.getSetCC(VT, ORNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = MVT::isInteger(LL.getValueType());
+      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID)
+        return DAG.getSetCC(N0.getValueType(), LL, LR, Result);
+    }
+  }
+
+  // Simplify: and (op x...), (op y...)  -> (op (and x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.Val) return Tmp;
+  }
+  
+  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+  // fold (and (sra)) -> (and (srl)) when possible.
+  if (!MVT::isVector(VT) &&
+      SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  // fold (zext_inreg (extload x)) -> (zextload x)
+  if (ISD::isEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val)) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
+        (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+      SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+                                         LN0->getBasePtr(), LN0->getSrcValue(),
+                                         LN0->getSrcValueOffset(), EVT,
+                                         LN0->isVolatile(), 
+                                         LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+      return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+  if (ISD::isSEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
+        (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+      SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+                                         LN0->getBasePtr(), LN0->getSrcValue(),
+                                         LN0->getSrcValueOffset(), EVT,
+                                         LN0->isVolatile(), 
+                                         LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+      return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  
+  // fold (and (load x), 255) -> (zextload x, i8)
+  // fold (and (extload x, i16), 255) -> (zextload x, i8)
+  if (N1C && N0.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+        LN0->getAddressingMode() == ISD::UNINDEXED &&
+        N0.hasOneUse()) {
+      MVT::ValueType EVT, LoadedVT;
+      if (N1C->getValue() == 255)
+        EVT = MVT::i8;
+      else if (N1C->getValue() == 65535)
+        EVT = MVT::i16;
+      else if (N1C->getValue() == ~0U)
+        EVT = MVT::i32;
+      else
+        EVT = MVT::Other;
+    
+      LoadedVT = LN0->getLoadedVT();
+      if (EVT != MVT::Other && LoadedVT > EVT &&
+          (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+        MVT::ValueType PtrType = N0.getOperand(1).getValueType();
+        // For big endian targets, we need to add an offset to the pointer to
+        // load the correct bytes.  For little endian systems, we merely need to
+        // read fewer bytes from the same pointer.
+        unsigned PtrOff =
+          (MVT::getSizeInBits(LoadedVT) - MVT::getSizeInBits(EVT)) / 8;
+        SDOperand NewPtr = LN0->getBasePtr();
+        if (!TLI.isLittleEndian())
+          NewPtr = DAG.getNode(ISD::ADD, PtrType, NewPtr,
+                               DAG.getConstant(PtrOff, PtrType));
+        AddToWorkList(NewPtr.Val);
+        SDOperand Load =
+          DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), NewPtr,
+                         LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT,
+                         LN0->isVolatile(), LN0->getAlignment());
+        AddToWorkList(N);
+        CombineTo(N0.Val, Load, Load.getValue(1));
+        return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitOR(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N1.getValueType();
+  unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (or x, undef) -> -1
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(~0ULL, VT);
+  // fold (or c1, c2) -> c1|c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::OR, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::OR, VT, N1, N0);
+  // fold (or x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (or x, -1) -> -1
+  if (N1C && N1C->isAllOnesValue())
+    return N1;
+  // fold (or x, c) -> c iff (x & ~c) == 0
+  if (N1C && 
+      DAG.MaskedValueIsZero(N0,~N1C->getValue() & (~0ULL>>(64-OpSizeInBits))))
+    return N1;
+  // reassociate or
+  SDOperand ROR = ReassociateOps(ISD::OR, N0, N1);
+  if (ROR.Val != 0)
+    return ROR;
+  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  if (N1C && N0.getOpcode() == ISD::AND && N0.Val->hasOneUse() &&
+             isa<ConstantSDNode>(N0.getOperand(1))) {
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+    return DAG.getNode(ISD::AND, VT, DAG.getNode(ISD::OR, VT, N0.getOperand(0),
+                                                 N1),
+                       DAG.getConstant(N1C->getValue() | C1->getValue(), VT));
+  }
+  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+    
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        MVT::isInteger(LL.getValueType())) {
+      // fold (X != 0) | (Y != 0) -> (X|Y != 0)
+      // fold (X <  0) | (Y <  0) -> (X|Y < 0)
+      if (cast<ConstantSDNode>(LR)->getValue() == 0 && 
+          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+        SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.Val);
+        return DAG.getSetCC(VT, ORNode, LR, Op1);
+      }
+      // fold (X != -1) | (Y != -1) -> (X&Y != -1)
+      // fold (X >  -1) | (Y >  -1) -> (X&Y >  -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 
+          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+        SDOperand ANDNode = DAG.getNode(ISD::AND, LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.Val);
+        return DAG.getSetCC(VT, ANDNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = MVT::isInteger(LL.getValueType());
+      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID)
+        return DAG.getSetCC(N0.getValueType(), LL, LR, Result);
+    }
+  }
+  
+  // Simplify: or (op x...), (op y...)  -> (op (or x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.Val) return Tmp;
+  }
+  
+  // (X & C1) | (Y & C2)  -> (X|Y) & C3  if possible.
+  if (N0.getOpcode() == ISD::AND &&
+      N1.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      N1.getOperand(1).getOpcode() == ISD::Constant &&
+      // Don't increase # computations.
+      (N0.Val->hasOneUse() || N1.Val->hasOneUse())) {
+    // We can only do this xform if we know that bits from X that are set in C2
+    // but not in C1 are already zero.  Likewise for Y.
+    uint64_t LHSMask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    uint64_t RHSMask = cast<ConstantSDNode>(N1.getOperand(1))->getValue();
+    
+    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+      SDOperand X =DAG.getNode(ISD::OR, VT, N0.getOperand(0), N1.getOperand(0));
+      return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(LHSMask|RHSMask, VT));
+    }
+  }
+  
+  
+  // See if this is some rotate idiom.
+  if (SDNode *Rot = MatchRotate(N0, N1))
+    return SDOperand(Rot, 0);
+
+  return SDOperand();
+}
+
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDOperand Op, SDOperand &Shift, SDOperand &Mask) {
+  if (Op.getOpcode() == ISD::AND) {
+    if (isa<ConstantSDNode>(Op.getOperand(1))) {
+      Mask = Op.getOperand(1);
+      Op = Op.getOperand(0);
+    } else {
+      return false;
+    }
+  }
+  
+  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+    Shift = Op;
+    return true;
+  }
+  return false;  
+}
+
+
+// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDOperand LHS, SDOperand RHS) {
+  // Must be a legal type.  Expanded an promoted things won't work with rotates.
+  MVT::ValueType VT = LHS.getValueType();
+  if (!TLI.isTypeLegal(VT)) return 0;
+
+  // The target must have at least one rotate flavor.
+  bool HasROTL = TLI.isOperationLegal(ISD::ROTL, VT);
+  bool HasROTR = TLI.isOperationLegal(ISD::ROTR, VT);
+  if (!HasROTL && !HasROTR) return 0;
+  
+  // Match "(X shl/srl V1) & V2" where V2 may not be present.
+  SDOperand LHSShift;   // The shift.
+  SDOperand LHSMask;    // AND value if any.
+  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+    return 0; // Not part of a rotate.
+
+  SDOperand RHSShift;   // The shift.
+  SDOperand RHSMask;    // AND value if any.
+  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+    return 0; // Not part of a rotate.
+  
+  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+    return 0;   // Not shifting the same value.
+
+  if (LHSShift.getOpcode() == RHSShift.getOpcode())
+    return 0;   // Shifts must disagree.
+    
+  // Canonicalize shl to left side in a shl/srl pair.
+  if (RHSShift.getOpcode() == ISD::SHL) {
+    std::swap(LHS, RHS);
+    std::swap(LHSShift, RHSShift);
+    std::swap(LHSMask , RHSMask );
+  }
+
+  unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+  SDOperand LHSShiftArg = LHSShift.getOperand(0);
+  SDOperand LHSShiftAmt = LHSShift.getOperand(1);
+  SDOperand RHSShiftAmt = RHSShift.getOperand(1);
+
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+      RHSShiftAmt.getOpcode() == ISD::Constant) {
+    uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getValue();
+    uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getValue();
+    if ((LShVal + RShVal) != OpSizeInBits)
+      return 0;
+
+    SDOperand Rot;
+    if (HasROTL)
+      Rot = DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt);
+    else
+      Rot = DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt);
+    
+    // If there is an AND of either shifted operand, apply it to the result.
+    if (LHSMask.Val || RHSMask.Val) {
+      uint64_t Mask = MVT::getIntVTBitMask(VT);
+      
+      if (LHSMask.Val) {
+        uint64_t RHSBits = (1ULL << LShVal)-1;
+        Mask &= cast<ConstantSDNode>(LHSMask)->getValue() | RHSBits;
+      }
+      if (RHSMask.Val) {
+        uint64_t LHSBits = ~((1ULL << (OpSizeInBits-RShVal))-1);
+        Mask &= cast<ConstantSDNode>(RHSMask)->getValue() | LHSBits;
+      }
+        
+      Rot = DAG.getNode(ISD::AND, VT, Rot, DAG.getConstant(Mask, VT));
+    }
+    
+    return Rot.Val;
+  }
+  
+  // If there is a mask here, and we have a variable shift, we can't be sure
+  // that we're masking out the right stuff.
+  if (LHSMask.Val || RHSMask.Val)
+    return 0;
+  
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+  if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+      LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC = 
+          dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+      if (SUBC->getValue() == OpSizeInBits)
+        if (HasROTL)
+          return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+        else
+          return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+    }
+  }
+  
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+  if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+      RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC = 
+          dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+      if (SUBC->getValue() == OpSizeInBits)
+        if (HasROTL)
+          return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+        else
+          return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+    }
+  }
+
+  // Look for sign/zext/any-extended cases:
+  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND) &&
+      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND)) {
+    SDOperand LExtOp0 = LHSShiftAmt.getOperand(0);
+    SDOperand RExtOp0 = RHSShiftAmt.getOperand(0);
+    if (RExtOp0.getOpcode() == ISD::SUB &&
+        RExtOp0.getOperand(1) == LExtOp0) {
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotr x, y)
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotl x, (sub 32, y))
+      if (ConstantSDNode *SUBC = cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+        if (SUBC->getValue() == OpSizeInBits) {
+          if (HasROTL)
+            return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+          else
+            return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+        }
+      }
+    } else if (LExtOp0.getOpcode() == ISD::SUB &&
+               RExtOp0 == LExtOp0.getOperand(1)) {
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) -> 
+      //   (rotl x, y)
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
+      //   (rotr x, (sub 32, y))
+      if (ConstantSDNode *SUBC = cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+        if (SUBC->getValue() == OpSizeInBits) {
+          if (HasROTL)
+            return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, RHSShiftAmt).Val;
+          else
+            return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+        }
+      }
+    }
+  }
+  
+  return 0;
+}
+
+
+SDOperand DAGCombiner::visitXOR(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand LHS, RHS, CC;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (xor x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (xor c1, c2) -> c1^c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::XOR, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::XOR, VT, N1, N0);
+  // fold (xor x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // reassociate xor
+  SDOperand RXOR = ReassociateOps(ISD::XOR, N0, N1);
+  if (RXOR.Val != 0)
+    return RXOR;
+  // fold !(x cc y) -> (x !cc y)
+  if (N1C && N1C->getValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+    bool isInt = MVT::isInteger(LHS.getValueType());
+    ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                                               isInt);
+    if (N0.getOpcode() == ISD::SETCC)
+      return DAG.getSetCC(VT, LHS, RHS, NotCC);
+    if (N0.getOpcode() == ISD::SELECT_CC)
+      return DAG.getSelectCC(LHS, RHS, N0.getOperand(2),N0.getOperand(3),NotCC);
+    assert(0 && "Unhandled SetCC Equivalent!");
+    abort();
+  }
+  // fold !(x or y) -> (!x and !y) iff x or y are setcc
+  if (N1C && N1C->getValue() == 1 && VT == MVT::i1 &&
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDOperand LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, VT, LHS, N1);  // RHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, VT, RHS, N1);  // RHS = ~RHS
+      AddToWorkList(LHS.Val); AddToWorkList(RHS.Val);
+      return DAG.getNode(NewOpcode, VT, LHS, RHS);
+    }
+  }
+  // fold !(x or y) -> (!x and !y) iff x or y are constants
+  if (N1C && N1C->isAllOnesValue() && 
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDOperand LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, VT, LHS, N1);  // RHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, VT, RHS, N1);  // RHS = ~RHS
+      AddToWorkList(LHS.Val); AddToWorkList(RHS.Val);
+      return DAG.getNode(NewOpcode, VT, LHS, RHS);
+    }
+  }
+  // fold (xor (xor x, c1), c2) -> (xor x, c1^c2)
+  if (N1C && N0.getOpcode() == ISD::XOR) {
+    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N00C)
+      return DAG.getNode(ISD::XOR, VT, N0.getOperand(1),
+                         DAG.getConstant(N1C->getValue()^N00C->getValue(), VT));
+    if (N01C)
+      return DAG.getNode(ISD::XOR, VT, N0.getOperand(0),
+                         DAG.getConstant(N1C->getValue()^N01C->getValue(), VT));
+  }
+  // fold (xor x, x) -> 0
+  if (N0 == N1) {
+    if (!MVT::isVector(VT)) {
+      return DAG.getConstant(0, VT);
+    } else if (!AfterLegalize || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+      // Produce a vector of zeros.
+      SDOperand El = DAG.getConstant(0, MVT::getVectorElementType(VT));
+      std::vector<SDOperand> Ops(MVT::getVectorNumElements(VT), El);
+      return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+    }
+  }
+  
+  // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.Val) return Tmp;
+  }
+  
+  // Simplify the expression using non-local knowledge.
+  if (!MVT::isVector(VT) &&
+      SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSHL(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+  
+  // fold (shl c1, c2) -> c1<<c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::SHL, VT, N0, N1);
+  // fold (shl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (shl x, c >= size(x)) -> undef
+  if (N1C && N1C->getValue() >= OpSizeInBits)
+    return DAG.getNode(ISD::UNDEF, VT);
+  // fold (shl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (shl x, c) is known to be zero, return 0
+  if (DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
+    return DAG.getConstant(0, VT);
+  if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  // fold (shl (shl x, c1), c2) -> 0 or (shl x, c1+c2)
+  if (N1C && N0.getOpcode() == ISD::SHL && 
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    uint64_t c2 = N1C->getValue();
+    if (c1 + c2 > OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SHL, VT, N0.getOperand(0), 
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+  // fold (shl (srl x, c1), c2) -> (shl (and x, -1 << c1), c2-c1) or
+  //                               (srl (and x, -1 << c1), c1-c2)
+  if (N1C && N0.getOpcode() == ISD::SRL && 
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    uint64_t c2 = N1C->getValue();
+    SDOperand Mask = DAG.getNode(ISD::AND, VT, N0.getOperand(0),
+                                 DAG.getConstant(~0ULL << c1, VT));
+    if (c2 > c1)
+      return DAG.getNode(ISD::SHL, VT, Mask, 
+                         DAG.getConstant(c2-c1, N1.getValueType()));
+    else
+      return DAG.getNode(ISD::SRL, VT, Mask, 
+                         DAG.getConstant(c1-c2, N1.getValueType()));
+  }
+  // fold (shl (sra x, c1), c1) -> (and x, -1 << c1)
+  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+    return DAG.getNode(ISD::AND, VT, N0.getOperand(0),
+                       DAG.getConstant(~0ULL << N1C->getValue(), VT));
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSRA(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  
+  // fold (sra c1, c2) -> c1>>c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::SRA, VT, N0, N1);
+  // fold (sra 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (sra -1, x) -> -1
+  if (N0C && N0C->isAllOnesValue())
+    return N0;
+  // fold (sra x, c >= size(x)) -> undef
+  if (N1C && N1C->getValue() >= MVT::getSizeInBits(VT))
+    return DAG.getNode(ISD::UNDEF, VT);
+  // fold (sra x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+  // sext_inreg.
+  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+    unsigned LowBits = MVT::getSizeInBits(VT) - (unsigned)N1C->getValue();
+    MVT::ValueType EVT;
+    switch (LowBits) {
+    default: EVT = MVT::Other; break;
+    case  1: EVT = MVT::i1;    break;
+    case  8: EVT = MVT::i8;    break;
+    case 16: EVT = MVT::i16;   break;
+    case 32: EVT = MVT::i32;   break;
+    }
+    if (EVT > MVT::Other && TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0.getOperand(0),
+                         DAG.getValueType(EVT));
+  }
+  
+  // fold (sra (sra x, c1), c2) -> (sra x, c1+c2)
+  if (N1C && N0.getOpcode() == ISD::SRA) {
+    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      unsigned Sum = N1C->getValue() + C1->getValue();
+      if (Sum >= MVT::getSizeInBits(VT)) Sum = MVT::getSizeInBits(VT)-1;
+      return DAG.getNode(ISD::SRA, VT, N0.getOperand(0),
+                         DAG.getConstant(Sum, N1C->getValueType(0)));
+    }
+  }
+  
+  // Simplify, based on bits shifted out of the LHS. 
+  if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  
+  // If the sign bit is known to be zero, switch this to a SRL.
+  if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT)))
+    return DAG.getNode(ISD::SRL, VT, N0, N1);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSRL(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  MVT::ValueType VT = N0.getValueType();
+  unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+  
+  // fold (srl c1, c2) -> c1 >>u c2
+  if (N0C && N1C)
+    return DAG.getNode(ISD::SRL, VT, N0, N1);
+  // fold (srl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (srl x, c >= size(x)) -> undef
+  if (N1C && N1C->getValue() >= OpSizeInBits)
+    return DAG.getNode(ISD::UNDEF, VT);
+  // fold (srl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (srl x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), ~0ULL >> (64-OpSizeInBits)))
+    return DAG.getConstant(0, VT);
+  
+  // fold (srl (srl x, c1), c2) -> 0 or (srl x, c1+c2)
+  if (N1C && N0.getOpcode() == ISD::SRL && 
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    uint64_t c2 = N1C->getValue();
+    if (c1 + c2 > OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SRL, VT, N0.getOperand(0), 
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+  
+  // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    // Shifting in all undef bits?
+    MVT::ValueType SmallVT = N0.getOperand(0).getValueType();
+    if (N1C->getValue() >= MVT::getSizeInBits(SmallVT))
+      return DAG.getNode(ISD::UNDEF, VT);
+
+    SDOperand SmallShift = DAG.getNode(ISD::SRL, SmallVT, N0.getOperand(0), N1);
+    AddToWorkList(SmallShift.Val);
+    return DAG.getNode(ISD::ANY_EXTEND, VT, SmallShift);
+  }
+  
+  // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
+  // bit, which is unmodified by sra.
+  if (N1C && N1C->getValue()+1 == MVT::getSizeInBits(VT)) {
+    if (N0.getOpcode() == ISD::SRA)
+      return DAG.getNode(ISD::SRL, VT, N0.getOperand(0), N1);
+  }
+  
+  // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
+  if (N1C && N0.getOpcode() == ISD::CTLZ && 
+      N1C->getValue() == Log2_32(MVT::getSizeInBits(VT))) {
+    uint64_t KnownZero, KnownOne, Mask = MVT::getIntVTBitMask(VT);
+    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+    
+    // If any of the input bits are KnownOne, then the input couldn't be all
+    // zeros, thus the result of the srl will always be zero.
+    if (KnownOne) return DAG.getConstant(0, VT);
+    
+    // If all of the bits input the to ctlz node are known to be zero, then
+    // the result of the ctlz is "32" and the result of the shift is one.
+    uint64_t UnknownBits = ~KnownZero & Mask;
+    if (UnknownBits == 0) return DAG.getConstant(1, VT);
+    
+    // Otherwise, check to see if there is exactly one bit input to the ctlz.
+    if ((UnknownBits & (UnknownBits-1)) == 0) {
+      // Okay, we know that only that the single bit specified by UnknownBits
+      // could be set on input to the CTLZ node.  If this bit is set, the SRL
+      // will return 0, if it is clear, it returns 1.  Change the CTLZ/SRL pair
+      // to an SRL,XOR pair, which is likely to simplify more.
+      unsigned ShAmt = CountTrailingZeros_64(UnknownBits);
+      SDOperand Op = N0.getOperand(0);
+      if (ShAmt) {
+        Op = DAG.getNode(ISD::SRL, VT, Op,
+                         DAG.getConstant(ShAmt, TLI.getShiftAmountTy()));
+        AddToWorkList(Op.Val);
+      }
+      return DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(1, VT));
+    }
+  }
+  
+  // fold operands of srl based on knowledge that the low bits are not
+  // demanded.
+  if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTLZ(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (ctlz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTLZ, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTTZ(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (cttz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTTZ, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTPOP(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (ctpop c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTPOP, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSELECT(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand N2 = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold select C, X, X -> X
+  if (N1 == N2)
+    return N1;
+  // fold select true, X, Y -> X
+  if (N0C && !N0C->isNullValue())
+    return N1;
+  // fold select false, X, Y -> Y
+  if (N0C && N0C->isNullValue())
+    return N2;
+  // fold select C, 1, X -> C | X
+  if (MVT::i1 == VT && N1C && N1C->getValue() == 1)
+    return DAG.getNode(ISD::OR, VT, N0, N2);
+  // fold select C, 0, X -> ~C & X
+  // FIXME: this should check for C type == X type, not i1?
+  if (MVT::i1 == VT && N1C && N1C->isNullValue()) {
+    SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT));
+    AddToWorkList(XORNode.Val);
+    return DAG.getNode(ISD::AND, VT, XORNode, N2);
+  }
+  // fold select C, X, 1 -> ~C | X
+  if (MVT::i1 == VT && N2C && N2C->getValue() == 1) {
+    SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT));
+    AddToWorkList(XORNode.Val);
+    return DAG.getNode(ISD::OR, VT, XORNode, N1);
+  }
+  // fold select C, X, 0 -> C & X
+  // FIXME: this should check for C type == X type, not i1?
+  if (MVT::i1 == VT && N2C && N2C->isNullValue())
+    return DAG.getNode(ISD::AND, VT, N0, N1);
+  // fold  X ? X : Y --> X ? 1 : Y --> X | Y
+  if (MVT::i1 == VT && N0 == N1)
+    return DAG.getNode(ISD::OR, VT, N0, N2);
+  // fold X ? Y : X --> X ? Y : 0 --> X & Y
+  if (MVT::i1 == VT && N0 == N2)
+    return DAG.getNode(ISD::AND, VT, N0, N1);
+  
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N1, N2))
+    return SDOperand(N, 0);  // Don't revisit N.
+  
+  // fold selects based on a setcc into other things, such as min/max/abs
+  if (N0.getOpcode() == ISD::SETCC)
+    // FIXME:
+    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+    // having to say they don't support SELECT_CC on every type the DAG knows
+    // about, since there is no way to mark an opcode illegal at all value types
+    if (TLI.isOperationLegal(ISD::SELECT_CC, MVT::Other))
+      return DAG.getNode(ISD::SELECT_CC, VT, N0.getOperand(0), N0.getOperand(1),
+                         N1, N2, N0.getOperand(2));
+    else
+      return SimplifySelect(N0, N1, N2);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSELECT_CC(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand N2 = N->getOperand(2);
+  SDOperand N3 = N->getOperand(3);
+  SDOperand N4 = N->getOperand(4);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+  
+  // fold select_cc lhs, rhs, x, x, cc -> x
+  if (N2 == N3)
+    return N2;
+  
+  // Determine if the condition we're dealing with is constant
+  SDOperand SCC = SimplifySetCC(TLI.getSetCCResultTy(), N0, N1, CC, false);
+  if (SCC.Val) AddToWorkList(SCC.Val);
+
+  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.Val)) {
+    if (SCCC->getValue())
+      return N2;    // cond always true -> true val
+    else
+      return N3;    // cond always false -> false val
+  }
+  
+  // Fold to a simpler select_cc
+  if (SCC.Val && SCC.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::SELECT_CC, N2.getValueType(), 
+                       SCC.getOperand(0), SCC.getOperand(1), N2, N3, 
+                       SCC.getOperand(2));
+  
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N2, N3))
+    return SDOperand(N, 0);  // Don't revisit N.
+  
+  // fold select_cc into other things, such as min/max/abs
+  return SimplifySelectCC(N0, N1, N2, N3, CC);
+}
+
+SDOperand DAGCombiner::visitSETCC(SDNode *N) {
+  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+                       cast<CondCodeSDNode>(N->getOperand(2))->get());
+}
+
+SDOperand DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (sext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::SIGN_EXTEND, VT, N0);
+  
+  // fold (sext (sext x)) -> (sext x)
+  // fold (sext (aext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::SIGN_EXTEND, VT, N0.getOperand(0));
+  
+  // fold (sext (truncate (load x))) -> (sext (smaller load x))
+  // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+    if (NarrowLoad.Val) {
+      if (NarrowLoad.Val != N0.Val)
+        CombineTo(N0.Val, NarrowLoad);
+      return DAG.getNode(ISD::SIGN_EXTEND, VT, NarrowLoad);
+    }
+  }
+
+  // See if the value being truncated is already sign extended.  If so, just
+  // eliminate the trunc/sext pair.
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand Op = N0.getOperand(0);
+    unsigned OpBits   = MVT::getSizeInBits(Op.getValueType());
+    unsigned MidBits  = MVT::getSizeInBits(N0.getValueType());
+    unsigned DestBits = MVT::getSizeInBits(VT);
+    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+    
+    if (OpBits == DestBits) {
+      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+      // bits, it is already ready.
+      if (NumSignBits > DestBits-MidBits)
+        return Op;
+    } else if (OpBits < DestBits) {
+      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+      // bits, just sext from i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::SIGN_EXTEND, VT, Op);
+    } else {
+      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+      // bits, just truncate to i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::TRUNCATE, VT, Op);
+    }
+    
+    // fold (sext (truncate x)) -> (sextinreg x).
+    if (!AfterLegalize || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+                                               N0.getValueType())) {
+      if (Op.getValueType() < VT)
+        Op = DAG.getNode(ISD::ANY_EXTEND, VT, Op);
+      else if (Op.getValueType() > VT)
+        Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, Op,
+                         DAG.getValueType(N0.getValueType()));
+    }
+  }
+  
+  // fold (sext (load x)) -> (sext (truncate (sextload x)))
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      (!AfterLegalize||TLI.isLoadXLegal(ISD::SEXTLOAD, N0.getValueType()))){
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(), 
+                                       LN0->isVolatile(),
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+  if ((ISD::isSEXTLoad(N0.Val) || ISD::isEXTLoad(N0.Val)) &&
+      ISD::isUNINDEXEDLoad(N0.Val) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    if (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT)) {
+      SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+                                         LN0->getBasePtr(), LN0->getSrcValue(),
+                                         LN0->getSrcValueOffset(), EVT,
+                                         LN0->isVolatile(), 
+                                         LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+                ExtLoad.getValue(1));
+      return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  
+  // sext(setcc x,y,cc) -> select_cc x, y, -1, 0, cc
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDOperand SCC = 
+      SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.Val) return SCC;
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (zext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ZERO_EXTEND, VT, N0);
+  // fold (zext (zext x)) -> (zext x)
+  // fold (zext (aext x)) -> (zext x)
+  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::ZERO_EXTEND, VT, N0.getOperand(0));
+
+  // fold (zext (truncate (load x))) -> (zext (smaller load x))
+  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+    if (NarrowLoad.Val) {
+      if (NarrowLoad.Val != N0.Val)
+        CombineTo(N0.Val, NarrowLoad);
+      return DAG.getNode(ISD::ZERO_EXTEND, VT, NarrowLoad);
+    }
+  }
+
+  // fold (zext (truncate x)) -> (and x, mask)
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (!AfterLegalize || TLI.isOperationLegal(ISD::AND, VT))) {
+    SDOperand Op = N0.getOperand(0);
+    if (Op.getValueType() < VT) {
+      Op = DAG.getNode(ISD::ANY_EXTEND, VT, Op);
+    } else if (Op.getValueType() > VT) {
+      Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+    }
+    return DAG.getZeroExtendInReg(Op, N0.getValueType());
+  }
+  
+  // fold (zext (and (trunc x), cst)) -> (and x, cst).
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    SDOperand X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType() < VT) {
+      X = DAG.getNode(ISD::ANY_EXTEND, VT, X);
+    } else if (X.getValueType() > VT) {
+      X = DAG.getNode(ISD::TRUNCATE, VT, X);
+    }
+    uint64_t Mask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(Mask, VT));
+  }
+  
+  // fold (zext (load x)) -> (zext (truncate (zextload x)))
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      (!AfterLegalize||TLI.isLoadXLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+  if ((ISD::isZEXTLoad(N0.Val) || ISD::isEXTLoad(N0.Val)) &&
+      ISD::isUNINDEXEDLoad(N0.Val) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  
+  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDOperand SCC = 
+      SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.Val) return SCC;
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitANY_EXTEND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (aext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ANY_EXTEND, VT, N0);
+  // fold (aext (aext x)) -> (aext x)
+  // fold (aext (zext x)) -> (zext x)
+  // fold (aext (sext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::ANY_EXTEND  ||
+      N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND)
+    return DAG.getNode(N0.getOpcode(), VT, N0.getOperand(0));
+  
+  // fold (aext (truncate (load x))) -> (aext (smaller load x))
+  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+    if (NarrowLoad.Val) {
+      if (NarrowLoad.Val != N0.Val)
+        CombineTo(N0.Val, NarrowLoad);
+      return DAG.getNode(ISD::ANY_EXTEND, VT, NarrowLoad);
+    }
+  }
+
+  // fold (aext (truncate x))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDOperand TruncOp = N0.getOperand(0);
+    if (TruncOp.getValueType() == VT)
+      return TruncOp; // x iff x size == zext size.
+    if (TruncOp.getValueType() > VT)
+      return DAG.getNode(ISD::TRUNCATE, VT, TruncOp);
+    return DAG.getNode(ISD::ANY_EXTEND, VT, TruncOp);
+  }
+  
+  // fold (aext (and (trunc x), cst)) -> (and x, cst).
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    SDOperand X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType() < VT) {
+      X = DAG.getNode(ISD::ANY_EXTEND, VT, X);
+    } else if (X.getValueType() > VT) {
+      X = DAG.getNode(ISD::TRUNCATE, VT, X);
+    }
+    uint64_t Mask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+    return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(Mask, VT));
+  }
+  
+  // fold (aext (load x)) -> (aext (truncate (extload x)))
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      (!AfterLegalize||TLI.isLoadXLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  
+  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+  // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
+  if (N0.getOpcode() == ISD::LOAD &&
+      !ISD::isNON_EXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType EVT = LN0->getLoadedVT();
+    SDOperand ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  
+  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDOperand SCC = 
+      SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.Val)
+      return SCC;
+  }
+  
+  return SDOperand();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDOperand DAGCombiner::ReduceLoadWidth(SDNode *N) {
+  unsigned Opc = N->getOpcode();
+  ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  MVT::ValueType EVT = N->getValueType(0);
+
+  // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+  // extended to VT.
+  if (Opc == ISD::SIGN_EXTEND_INREG) {
+    ExtType = ISD::SEXTLOAD;
+    EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+    if (AfterLegalize && !TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))
+      return SDOperand();
+  }
+
+  unsigned EVTBits = MVT::getSizeInBits(EVT);
+  unsigned ShAmt = 0;
+  bool CombineSRL =  false;
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShAmt = N01->getValue();
+      // Is the shift amount a multiple of size of VT?
+      if ((ShAmt & (EVTBits-1)) == 0) {
+        N0 = N0.getOperand(0);
+        if (MVT::getSizeInBits(N0.getValueType()) <= EVTBits)
+          return SDOperand();
+        CombineSRL = true;
+      }
+    }
+  }
+
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      // Do not allow folding to i1 here.  i1 is implicitly stored in memory in
+      // zero extended form: by shrinking the load, we lose track of the fact
+      // that it is already zero extended.
+      // FIXME: This should be reevaluated.
+      VT != MVT::i1) {
+    assert(MVT::getSizeInBits(N0.getValueType()) > EVTBits &&
+           "Cannot truncate to larger type!");
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    MVT::ValueType PtrType = N0.getOperand(1).getValueType();
+    // For big endian targets, we need to adjust the offset to the pointer to
+    // load the correct bytes.
+    if (!TLI.isLittleEndian())
+      ShAmt = MVT::getSizeInBits(N0.getValueType()) - ShAmt - EVTBits;
+    uint64_t PtrOff =  ShAmt / 8;
+    SDOperand NewPtr = DAG.getNode(ISD::ADD, PtrType, LN0->getBasePtr(),
+                                   DAG.getConstant(PtrOff, PtrType));
+    AddToWorkList(NewPtr.Val);
+    SDOperand Load = (ExtType == ISD::NON_EXTLOAD)
+      ? DAG.getLoad(VT, LN0->getChain(), NewPtr,
+                    LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                    LN0->isVolatile(), LN0->getAlignment())
+      : DAG.getExtLoad(ExtType, VT, LN0->getChain(), NewPtr,
+                       LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT,
+                       LN0->isVolatile(), LN0->getAlignment());
+    AddToWorkList(N);
+    if (CombineSRL) {
+      std::vector<SDNode*> NowDead;
+      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), NowDead);
+      CombineTo(N->getOperand(0).Val, Load);
+    } else
+      CombineTo(N0.Val, Load, Load.getValue(1));
+    if (ShAmt) {
+      if (Opc == ISD::SIGN_EXTEND_INREG)
+        return DAG.getNode(Opc, VT, Load, N->getOperand(1));
+      else
+        return DAG.getNode(Opc, VT, Load);
+    }
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  return SDOperand();
+}
+
+
+SDOperand DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  MVT::ValueType VT = N->getValueType(0);
+  MVT::ValueType EVT = cast<VTSDNode>(N1)->getVT();
+  unsigned EVTBits = MVT::getSizeInBits(EVT);
+  
+  // fold (sext_in_reg c1) -> c1
+  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0, N1);
+  
+  // If the input is already sign extended, just drop the extension.
+  if (DAG.ComputeNumSignBits(N0) >= MVT::getSizeInBits(VT)-EVTBits+1)
+    return N0;
+  
+  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      EVT < cast<VTSDNode>(N0.getOperand(1))->getVT()) {
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0.getOperand(0), N1);
+  }
+
+  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+  if (DAG.MaskedValueIsZero(N0, 1ULL << (EVTBits-1)))
+    return DAG.getZeroExtendInReg(N0, EVT);
+  
+  // fold operands of sext_in_reg based on knowledge that the top bits are not
+  // demanded.
+  if (SimplifyDemandedBits(SDOperand(N, 0)))
+    return SDOperand(N, 0);
+  
+  // fold (sext_in_reg (load x)) -> (smaller sextload x)
+  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+  SDOperand NarrowLoad = ReduceLoadWidth(N);
+  if (NarrowLoad.Val)
+    return NarrowLoad;
+
+  // fold (sext_in_reg (srl X, 24), i8) -> sra X, 24
+  // fold (sext_in_reg (srl X, 23), i8) -> sra X, 23 iff possible.
+  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+  if (N0.getOpcode() == ISD::SRL) {
+    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if (ShAmt->getValue()+EVTBits <= MVT::getSizeInBits(VT)) {
+        // We can turn this into an SRA iff the input to the SRL is already sign
+        // extended enough.
+        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+        if (MVT::getSizeInBits(VT)-(ShAmt->getValue()+EVTBits) < InSignBits)
+          return DAG.getNode(ISD::SRA, VT, N0.getOperand(0), N0.getOperand(1));
+      }
+  }
+
+  // fold (sext_inreg (extload x)) -> (sextload x)
+  if (ISD::isEXTLoad(N0.Val) && 
+      ISD::isUNINDEXEDLoad(N0.Val) &&
+      EVT == cast<LoadSDNode>(N0)->getLoadedVT() &&
+      (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+  if (ISD::isZEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+      N0.hasOneUse() &&
+      EVT == cast<LoadSDNode>(N0)->getLoadedVT() &&
+      (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // noop truncate
+  if (N0.getValueType() == N->getValueType(0))
+    return N0;
+  // fold (truncate c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::TRUNCATE, VT, N0);
+  // fold (truncate (truncate x)) -> (truncate x)
+  if (N0.getOpcode() == ISD::TRUNCATE)
+    return DAG.getNode(ISD::TRUNCATE, VT, N0.getOperand(0));
+  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+      N0.getOpcode() == ISD::ANY_EXTEND) {
+    if (N0.getOperand(0).getValueType() < VT)
+      // if the source is smaller than the dest, we still need an extend
+      return DAG.getNode(N0.getOpcode(), VT, N0.getOperand(0));
+    else if (N0.getOperand(0).getValueType() > VT)
+      // if the source is larger than the dest, than we just need the truncate
+      return DAG.getNode(ISD::TRUNCATE, VT, N0.getOperand(0));
+    else
+      // if the source and dest are the same type, we can drop both the extend
+      // and the truncate
+      return N0.getOperand(0);
+  }
+
+  // fold (truncate (load x)) -> (smaller load x)
+  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+  return ReduceLoadWidth(N);
+}
+
+SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+  // Only do this before legalize, since afterward the target may be depending
+  // on the bitconvert.
+  // First check to see if this is all constant.
+  if (!AfterLegalize &&
+      N0.getOpcode() == ISD::BUILD_VECTOR && N0.Val->hasOneUse() &&
+      MVT::isVector(VT)) {
+    bool isSimple = true;
+    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+      if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+          N0.getOperand(i).getOpcode() != ISD::Constant &&
+          N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+        isSimple = false; 
+        break;
+      }
+        
+    MVT::ValueType DestEltVT = MVT::getVectorElementType(N->getValueType(0));
+    assert(!MVT::isVector(DestEltVT) &&
+           "Element type of vector ValueType must not be vector!");
+    if (isSimple) {
+      return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.Val, DestEltVT);
+    }
+  }
+  
+  // If the input is a constant, let getNode() fold it.
+  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+    SDOperand Res = DAG.getNode(ISD::BIT_CONVERT, VT, N0);
+    if (Res.Val != N) return Res;
+  }
+  
+  if (N0.getOpcode() == ISD::BIT_CONVERT)  // conv(conv(x,t1),t2) -> conv(x,t2)
+    return DAG.getNode(ISD::BIT_CONVERT, VT, N0.getOperand(0));
+
+  // fold (conv (load x)) -> (load (conv*)x)
+  // If the resultant load doesn't need a  higher alignment than the original!
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      ISD::isUNINDEXEDLoad(N0.Val) &&
+      TLI.isOperationLegal(ISD::LOAD, VT)) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    unsigned Align = TLI.getTargetMachine().getTargetData()->
+      getABITypeAlignment(MVT::getTypeForValueType(VT));
+    unsigned OrigAlign = LN0->getAlignment();
+    if (Align <= OrigAlign) {
+      SDOperand Load = DAG.getLoad(VT, LN0->getChain(), LN0->getBasePtr(),
+                                   LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                                   LN0->isVolatile(), Align);
+      AddToWorkList(N);
+      CombineTo(N0.Val, DAG.getNode(ISD::BIT_CONVERT, N0.getValueType(), Load),
+                Load.getValue(1));
+      return Load;
+    }
+  }
+  
+  return SDOperand();
+}
+
+/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the 
+/// destination element value type.
+SDOperand DAGCombiner::
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT::ValueType DstEltVT) {
+  MVT::ValueType SrcEltVT = BV->getOperand(0).getValueType();
+  
+  // If this is already the right type, we're done.
+  if (SrcEltVT == DstEltVT) return SDOperand(BV, 0);
+  
+  unsigned SrcBitSize = MVT::getSizeInBits(SrcEltVT);
+  unsigned DstBitSize = MVT::getSizeInBits(DstEltVT);
+  
+  // If this is a conversion of N elements of one type to N elements of another
+  // type, convert each element.  This handles FP<->INT cases.
+  if (SrcBitSize == DstBitSize) {
+    SmallVector<SDOperand, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, DstEltVT, BV->getOperand(i)));
+      AddToWorkList(Ops.back().Val);
+    }
+    MVT::ValueType VT =
+      MVT::getVectorType(DstEltVT,
+                         MVT::getVectorNumElements(BV->getValueType(0)));
+    return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+  }
+  
+  // Otherwise, we're growing or shrinking the elements.  To avoid having to
+  // handle annoying details of growing/shrinking FP values, we convert them to
+  // int first.
+  if (MVT::isFloatingPoint(SrcEltVT)) {
+    // Convert the input float vector to a int vector where the elements are the
+    // same sizes.
+    assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+    MVT::ValueType IntVT = SrcEltVT == MVT::f32 ? MVT::i32 : MVT::i64;
+    BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).Val;
+    SrcEltVT = IntVT;
+  }
+  
+  // Now we know the input is an integer vector.  If the output is a FP type,
+  // convert to integer first, then to FP of the right size.
+  if (MVT::isFloatingPoint(DstEltVT)) {
+    assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+    MVT::ValueType TmpVT = DstEltVT == MVT::f32 ? MVT::i32 : MVT::i64;
+    SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).Val;
+    
+    // Next, convert to FP elements of the same size.
+    return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+  }
+  
+  // Okay, we know the src/dst types are both integers of differing types.
+  // Handling growing first.
+  assert(MVT::isInteger(SrcEltVT) && MVT::isInteger(DstEltVT));
+  if (SrcBitSize < DstBitSize) {
+    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+    
+    SmallVector<SDOperand, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+         i += NumInputsPerOutput) {
+      bool isLE = TLI.isLittleEndian();
+      uint64_t NewBits = 0;
+      bool EltIsUndef = true;
+      for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+        // Shift the previously computed bits over.
+        NewBits <<= SrcBitSize;
+        SDOperand Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+        if (Op.getOpcode() == ISD::UNDEF) continue;
+        EltIsUndef = false;
+        
+        NewBits |= cast<ConstantSDNode>(Op)->getValue();
+      }
+      
+      if (EltIsUndef)
+        Ops.push_back(DAG.getNode(ISD::UNDEF, DstEltVT));
+      else
+        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+    }
+
+    MVT::ValueType VT = MVT::getVectorType(DstEltVT,
+                                           Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+  }
+  
+  // Finally, this must be the case where we are shrinking elements: each input
+  // turns into multiple outputs.
+  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+  SmallVector<SDOperand, 8> Ops;
+  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+      for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+        Ops.push_back(DAG.getNode(ISD::UNDEF, DstEltVT));
+      continue;
+    }
+    uint64_t OpVal = cast<ConstantSDNode>(BV->getOperand(i))->getValue();
+
+    for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+      unsigned ThisVal = OpVal & ((1ULL << DstBitSize)-1);
+      OpVal >>= DstBitSize;
+      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+    }
+
+    // For big endian targets, swap the order of the pieces of each element.
+    if (!TLI.isLittleEndian())
+      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+  }
+  MVT::ValueType VT = MVT::getVectorType(DstEltVT, Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+}
+
+
+
+SDOperand DAGCombiner::visitFADD(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (fadd c1, c2) -> c1+c2
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FADD, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FADD, VT, N1, N0);
+  // fold (A + (-B)) -> A-B
+  if (isNegatibleForFree(N1) == 2)
+    return DAG.getNode(ISD::FSUB, VT, N0, GetNegatedExpression(N1, DAG));
+  // fold ((-A) + B) -> B-A
+  if (isNegatibleForFree(N0) == 2)
+    return DAG.getNode(ISD::FSUB, VT, N1, GetNegatedExpression(N0, DAG));
+  
+  // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+      N0.Val->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FADD, VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FADD, VT, N0.getOperand(1), N1));
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFSUB(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (fsub c1, c2) -> c1-c2
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FSUB, VT, N0, N1);
+  // fold (0-B) -> -B
+  if (UnsafeFPMath && N0CFP && N0CFP->getValue() == 0.0) {
+    if (isNegatibleForFree(N1))
+      return GetNegatedExpression(N1, DAG);
+    return DAG.getNode(ISD::FNEG, VT, N1);
+  }
+  // fold (A-(-B)) -> A+B
+  if (isNegatibleForFree(N1))
+    return DAG.getNode(ISD::FADD, VT, N0, GetNegatedExpression(N1, DAG));
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFMUL(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (fmul c1, c2) -> c1*c2
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FMUL, VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FMUL, VT, N1, N0);
+  // fold (fmul X, 2.0) -> (fadd X, X)
+  if (N1CFP && N1CFP->isExactlyValue(+2.0))
+    return DAG.getNode(ISD::FADD, VT, N0, N0);
+  // fold (fmul X, -1.0) -> (fneg X)
+  if (N1CFP && N1CFP->isExactlyValue(-1.0))
+    return DAG.getNode(ISD::FNEG, VT, N0);
+  
+  // -X * -Y -> X*Y
+  if (char LHSNeg = isNegatibleForFree(N0)) {
+    if (char RHSNeg = isNegatibleForFree(N1)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FMUL, VT, GetNegatedExpression(N0, DAG),
+                           GetNegatedExpression(N1, DAG));
+    }
+  }
+  
+  // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+      N0.Val->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FMUL, VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FMUL, VT, N0.getOperand(1), N1));
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFDIV(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold vector ops
+  if (MVT::isVector(VT)) {
+    SDOperand FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.Val) return FoldedVOp;
+  }
+  
+  // fold (fdiv c1, c2) -> c1/c2
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FDIV, VT, N0, N1);
+  
+  
+  // -X / -Y -> X*Y
+  if (char LHSNeg = isNegatibleForFree(N0)) {
+    if (char RHSNeg = isNegatibleForFree(N1)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FDIV, VT, GetNegatedExpression(N0, DAG),
+                           GetNegatedExpression(N1, DAG));
+    }
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFREM(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (frem c1, c2) -> fmod(c1,c2)
+  if (N0CFP && N1CFP)
+    return DAG.getNode(ISD::FREM, VT, N0, N1);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  MVT::ValueType VT = N->getValueType(0);
+
+  if (N0CFP && N1CFP)  // Constant fold
+    return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1);
+  
+  if (N1CFP) {
+    // copysign(x, c1) -> fabs(x)       iff ispos(c1)
+    // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+    union {
+      double d;
+      int64_t i;
+    } u;
+    u.d = N1CFP->getValue();
+    if (u.i >= 0)
+      return DAG.getNode(ISD::FABS, VT, N0);
+    else
+      return DAG.getNode(ISD::FNEG, VT, DAG.getNode(ISD::FABS, VT, N0));
+  }
+  
+  // copysign(fabs(x), y) -> copysign(x, y)
+  // copysign(fneg(x), y) -> copysign(x, y)
+  // copysign(copysign(x,z), y) -> copysign(x, y)
+  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+      N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, VT, N0.getOperand(0), N1);
+
+  // copysign(x, abs(y)) -> abs(x)
+  if (N1.getOpcode() == ISD::FABS)
+    return DAG.getNode(ISD::FABS, VT, N0);
+  
+  // copysign(x, copysign(y,z)) -> copysign(x, z)
+  if (N1.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1.getOperand(1));
+  
+  // copysign(x, fp_extend(y)) -> copysign(x, y)
+  // copysign(x, fp_round(y)) -> copysign(x, y)
+  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+    return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1.getOperand(0));
+  
+  return SDOperand();
+}
+
+
+
+SDOperand DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (sint_to_fp c1) -> c1fp
+  if (N0C)
+    return DAG.getNode(ISD::SINT_TO_FP, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+
+  // fold (uint_to_fp c1) -> c1fp
+  if (N0C)
+    return DAG.getNode(ISD::UINT_TO_FP, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fp_to_sint c1fp) -> c1
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_TO_SINT, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fp_to_uint c1fp) -> c1
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_TO_UINT, VT, N0);
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_ROUND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fp_round c1fp) -> c1fp
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_ROUND, VT, N0);
+  
+  // fold (fp_round (fp_extend x)) -> x
+  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+    return N0.getOperand(0);
+  
+  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.Val->hasOneUse()) {
+    SDOperand Tmp = DAG.getNode(ISD::FP_ROUND, VT, N0.getOperand(0));
+    AddToWorkList(Tmp.Val);
+    return DAG.getNode(ISD::FCOPYSIGN, VT, Tmp, N0.getOperand(1));
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  MVT::ValueType VT = N->getValueType(0);
+  MVT::ValueType EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  
+  // fold (fp_round_inreg c1fp) -> c1fp
+  if (N0CFP) {
+    SDOperand Round = DAG.getConstantFP(N0CFP->getValue(), EVT);
+    return DAG.getNode(ISD::FP_EXTEND, VT, Round);
+  }
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_EXTEND(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fp_extend c1fp) -> c1fp
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_EXTEND, VT, N0);
+  
+  // fold (fpext (load x)) -> (fpext (fpround (extload x)))
+  if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+      (!AfterLegalize||TLI.isLoadXLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDOperand ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getSrcValue(),
+                                       LN0->getSrcValueOffset(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), 
+                                       LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.Val, DAG.getNode(ISD::FP_ROUND, N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDOperand(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFNEG(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+
+  if (isNegatibleForFree(N0))
+    return GetNegatedExpression(N0, DAG);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFABS(SDNode *N) {
+  SDOperand N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // fold (fabs c1) -> fabs(c1)
+  if (N0CFP)
+    return DAG.getNode(ISD::FABS, VT, N0);
+  // fold (fabs (fabs x)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FABS)
+    return N->getOperand(0);
+  // fold (fabs (fneg x)) -> (fabs x)
+  // fold (fabs (fcopysign x, y)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FABS, VT, N0.getOperand(0));
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitBRCOND(SDNode *N) {
+  SDOperand Chain = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  SDOperand N2 = N->getOperand(2);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  
+  // never taken branch, fold to chain
+  if (N1C && N1C->isNullValue())
+    return Chain;
+  // unconditional branch
+  if (N1C && N1C->getValue() == 1)
+    return DAG.getNode(ISD::BR, MVT::Other, Chain, N2);
+  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+  // on the target.
+  if (N1.getOpcode() == ISD::SETCC && 
+      TLI.isOperationLegal(ISD::BR_CC, MVT::Other)) {
+    return DAG.getNode(ISD::BR_CC, MVT::Other, Chain, N1.getOperand(2),
+                       N1.getOperand(0), N1.getOperand(1), N2);
+  }
+  return SDOperand();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDOperand DAGCombiner::visitBR_CC(SDNode *N) {
+  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+  SDOperand CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+  
+  // Use SimplifySetCC  to simplify SETCC's.
+  SDOperand Simp = SimplifySetCC(MVT::i1, CondLHS, CondRHS, CC->get(), false);
+  if (Simp.Val) AddToWorkList(Simp.Val);
+
+  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.Val);
+
+  // fold br_cc true, dest -> br dest (unconditional branch)
+  if (SCCC && SCCC->getValue())
+    return DAG.getNode(ISD::BR, MVT::Other, N->getOperand(0),
+                       N->getOperand(4));
+  // fold br_cc false, dest -> unconditional fall through
+  if (SCCC && SCCC->isNullValue())
+    return N->getOperand(0);
+
+  // fold to a simpler setcc
+  if (Simp.Val && Simp.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::BR_CC, MVT::Other, N->getOperand(0), 
+                       Simp.getOperand(2), Simp.getOperand(0),
+                       Simp.getOperand(1), N->getOperand(4));
+  return SDOperand();
+}
+
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store and a
+/// pre-indexed load / store when the base pointer is a add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+  if (!AfterLegalize)
+    return false;
+
+  bool isLoad = true;
+  SDOperand Ptr;
+  MVT::ValueType VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->getAddressingMode() != ISD::UNINDEXED)
+      return false;
+    VT = LD->getLoadedVT();
+    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->getAddressingMode() != ISD::UNINDEXED)
+      return false;
+    VT = ST->getStoredVT();
+    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else
+    return false;
+
+  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+  // out.  There is no reason to make this a preinc/predec.
+  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+      Ptr.Val->hasOneUse())
+    return false;
+
+  // Ask the target to do addressing mode selection.
+  SDOperand BasePtr;
+  SDOperand Offset;
+  ISD::MemIndexedMode AM = ISD::UNINDEXED;
+  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+    return false;
+  // Don't create a indexed load / store with zero offset.
+  if (isa<ConstantSDNode>(Offset) &&
+      cast<ConstantSDNode>(Offset)->getValue() == 0)
+    return false;
+  
+  // Try turning it into a pre-indexed load / store except when:
+  // 1) The new base ptr is a frame index.
+  // 2) If N is a store and the new base ptr is either the same as or is a
+  //    predecessor of the value being stored.
+  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+  //    that would create a cycle.
+  // 4) All uses are load / store ops that use it as old base ptr.
+
+  // Check #1.  Preinc'ing a frame index would require copying the stack pointer
+  // (plus the implicit offset) to a register to preinc anyway.
+  if (isa<FrameIndexSDNode>(BasePtr))
+    return false;
+  
+  // Check #2.
+  if (!isLoad) {
+    SDOperand Val = cast<StoreSDNode>(N)->getValue();
+    if (Val == BasePtr || BasePtr.Val->isPredecessor(Val.Val))
+      return false;
+  }
+
+  // Now check for #3 and #4.
+  bool RealUse = false;
+  for (SDNode::use_iterator I = Ptr.Val->use_begin(),
+         E = Ptr.Val->use_end(); I != E; ++I) {
+    SDNode *Use = *I;
+    if (Use == N)
+      continue;
+    if (Use->isPredecessor(N))
+      return false;
+
+    if (!((Use->getOpcode() == ISD::LOAD &&
+           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+          (Use->getOpcode() == ISD::STORE) &&
+          cast<StoreSDNode>(Use)->getBasePtr() == Ptr))
+      RealUse = true;
+  }
+  if (!RealUse)
+    return false;
+
+  SDOperand Result;
+  if (isLoad)
+    Result = DAG.getIndexedLoad(SDOperand(N,0), BasePtr, Offset, AM);
+  else
+    Result = DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM);
+  ++PreIndexedNodes;
+  ++NodesCombined;
+  DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
+  DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG));
+  DOUT << '\n';
+  std::vector<SDNode*> NowDead;
+  if (isLoad) {
+    DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0),
+                                  NowDead);
+    DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2),
+                                  NowDead);
+  } else {
+    DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1),
+                                  NowDead);
+  }
+
+  // Nodes can end up on the worklist more than once.  Make sure we do
+  // not process a node that has been replaced.
+  for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+    removeFromWorkList(NowDead[i]);
+  // Finally, since the node is now dead, remove it from the graph.
+  DAG.DeleteNode(N);
+
+  // Replace the uses of Ptr with uses of the updated base value.
+  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+                                NowDead);
+  removeFromWorkList(Ptr.Val);
+  for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+    removeFromWorkList(NowDead[i]);
+  DAG.DeleteNode(Ptr.Val);
+
+  return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+  if (!AfterLegalize)
+    return false;
+
+  bool isLoad = true;
+  SDOperand Ptr;
+  MVT::ValueType VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->getAddressingMode() != ISD::UNINDEXED)
+      return false;
+    VT = LD->getLoadedVT();
+    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->getAddressingMode() != ISD::UNINDEXED)
+      return false;
+    VT = ST->getStoredVT();
+    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else
+    return false;
+
+  if (Ptr.Val->hasOneUse())
+    return false;
+  
+  for (SDNode::use_iterator I = Ptr.Val->use_begin(),
+         E = Ptr.Val->use_end(); I != E; ++I) {
+    SDNode *Op = *I;
+    if (Op == N ||
+        (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+      continue;
+
+    SDOperand BasePtr;
+    SDOperand Offset;
+    ISD::MemIndexedMode AM = ISD::UNINDEXED;
+    if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+      if (Ptr == Offset)
+        std::swap(BasePtr, Offset);
+      if (Ptr != BasePtr)
+        continue;
+      // Don't create a indexed load / store with zero offset.
+      if (isa<ConstantSDNode>(Offset) &&
+          cast<ConstantSDNode>(Offset)->getValue() == 0)
+        continue;
+
+      // Try turning it into a post-indexed load / store except when
+      // 1) All uses are load / store ops that use it as base ptr.
+      // 2) Op must be independent of N, i.e. Op is neither a predecessor
+      //    nor a successor of N. Otherwise, if Op is folded that would
+      //    create a cycle.
+
+      // Check for #1.
+      bool TryNext = false;
+      for (SDNode::use_iterator II = BasePtr.Val->use_begin(),
+             EE = BasePtr.Val->use_end(); II != EE; ++II) {
+        SDNode *Use = *II;
+        if (Use == Ptr.Val)
+          continue;
+
+        // If all the uses are load / store addresses, then don't do the
+        // transformation.
+        if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+          bool RealUse = false;
+          for (SDNode::use_iterator III = Use->use_begin(),
+                 EEE = Use->use_end(); III != EEE; ++III) {
+            SDNode *UseUse = *III;
+            if (!((UseUse->getOpcode() == ISD::LOAD &&
+                   cast<LoadSDNode>(UseUse)->getBasePtr().Val == Use) ||
+                  (UseUse->getOpcode() == ISD::STORE) &&
+                  cast<StoreSDNode>(UseUse)->getBasePtr().Val == Use))
+              RealUse = true;
+          }
+
+          if (!RealUse) {
+            TryNext = true;
+            break;
+          }
+        }
+      }
+      if (TryNext)
+        continue;
+
+      // Check for #2
+      if (!Op->isPredecessor(N) && !N->isPredecessor(Op)) {
+        SDOperand Result = isLoad
+          ? DAG.getIndexedLoad(SDOperand(N,0), BasePtr, Offset, AM)
+          : DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM);
+        ++PostIndexedNodes;
+        ++NodesCombined;
+        DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
+        DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG));
+        DOUT << '\n';
+        std::vector<SDNode*> NowDead;
+        if (isLoad) {
+          DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0),
+                                        NowDead);
+          DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2),
+                                        NowDead);
+        } else {
+          DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1),
+                                        NowDead);
+        }
+
+        // Nodes can end up on the worklist more than once.  Make sure we do
+        // not process a node that has been replaced.
+        for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+          removeFromWorkList(NowDead[i]);
+        // Finally, since the node is now dead, remove it from the graph.
+        DAG.DeleteNode(N);
+
+        // Replace the uses of Use with uses of the updated base value.
+        DAG.ReplaceAllUsesOfValueWith(SDOperand(Op, 0),
+                                      Result.getValue(isLoad ? 1 : 0),
+                                      NowDead);
+        removeFromWorkList(Op);
+        for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+          removeFromWorkList(NowDead[i]);
+        DAG.DeleteNode(Op);
+
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+SDOperand DAGCombiner::visitLOAD(SDNode *N) {
+  LoadSDNode *LD  = cast<LoadSDNode>(N);
+  SDOperand Chain = LD->getChain();
+  SDOperand Ptr   = LD->getBasePtr();
+
+  // If load is not volatile and there are no uses of the loaded value (and
+  // the updated indexed value in case of indexed loads), change uses of the
+  // chain value into uses of the chain input (i.e. delete the dead load).
+  if (!LD->isVolatile()) {
+    if (N->getValueType(1) == MVT::Other) {
+      // Unindexed loads.
+      if (N->hasNUsesOfValue(0, 0))
+        return CombineTo(N, DAG.getNode(ISD::UNDEF, N->getValueType(0)), Chain);
+    } else {
+      // Indexed loads.
+      assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+        SDOperand Undef0 = DAG.getNode(ISD::UNDEF, N->getValueType(0));
+        SDOperand Undef1 = DAG.getNode(ISD::UNDEF, N->getValueType(1));
+        SDOperand To[] = { Undef0, Undef1, Chain };
+        return CombineTo(N, To, 3);
+      }
+    }
+  }
+  
+  // If this load is directly stored, replace the load value with the stored
+  // value.
+  // TODO: Handle store large -> read small portion.
+  // TODO: Handle TRUNCSTORE/LOADEXT
+  if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+    if (ISD::isNON_TRUNCStore(Chain.Val)) {
+      StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+      if (PrevST->getBasePtr() == Ptr &&
+          PrevST->getValue().getValueType() == N->getValueType(0))
+      return CombineTo(N, Chain.getOperand(1), Chain);
+    }
+  }
+    
+  if (CombinerAA) {
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDOperand BetterChain = FindBetterChain(N, Chain);
+    
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      SDOperand ReplLoad;
+
+      // Replace the chain to void dependency.
+      if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+        ReplLoad = DAG.getLoad(N->getValueType(0), BetterChain, Ptr,
+                              LD->getSrcValue(), LD->getSrcValueOffset(),
+                              LD->isVolatile(), LD->getAlignment());
+      } else {
+        ReplLoad = DAG.getExtLoad(LD->getExtensionType(),
+                                  LD->getValueType(0),
+                                  BetterChain, Ptr, LD->getSrcValue(),
+                                  LD->getSrcValueOffset(),
+                                  LD->getLoadedVT(),
+                                  LD->isVolatile(), 
+                                  LD->getAlignment());
+      }
+
+      // Create token factor to keep old chain connected.
+      SDOperand Token = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                                    Chain, ReplLoad.getValue(1));
+      
+      // Replace uses with load result and token factor. Don't add users
+      // to work list.
+      return CombineTo(N, ReplLoad.getValue(0), Token, false);
+    }
+  }
+
+  // Try transforming N to an indexed load.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDOperand(N, 0);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSTORE(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDOperand Chain = ST->getChain();
+  SDOperand Value = ST->getValue();
+  SDOperand Ptr   = ST->getBasePtr();
+  
+  // If this is a store of a bit convert, store the input value if the
+  // resultant store does not need a higher alignment than the original.
+  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+      ST->getAddressingMode() == ISD::UNINDEXED) {
+    unsigned Align = ST->getAlignment();
+    MVT::ValueType SVT = Value.getOperand(0).getValueType();
+    unsigned OrigAlign = TLI.getTargetMachine().getTargetData()->
+      getABITypeAlignment(MVT::getTypeForValueType(SVT));
+    if (Align <= OrigAlign && TLI.isOperationLegal(ISD::STORE, SVT))
+      return DAG.getStore(Chain, Value.getOperand(0), Ptr, ST->getSrcValue(),
+                          ST->getSrcValueOffset(), ST->isVolatile(), Align);
+  }
+  
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+    if (Value.getOpcode() != ISD::TargetConstantFP) {
+      SDOperand Tmp;
+      switch (CFP->getValueType(0)) {
+      default: assert(0 && "Unknown FP type");
+      case MVT::f32:
+        if (!AfterLegalize || TLI.isTypeLegal(MVT::i32)) {
+          Tmp = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32);
+          return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(),
+                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              ST->getAlignment());
+        }
+        break;
+      case MVT::f64:
+        if (!AfterLegalize || TLI.isTypeLegal(MVT::i64)) {
+          Tmp = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64);
+          return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(),
+                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              ST->getAlignment());
+        } else if (TLI.isTypeLegal(MVT::i32)) {
+          // Many FP stores are not make apparent until after legalize, e.g. for
+          // argument passing.  Since this is so common, custom legalize the
+          // 64-bit integer store into two 32-bit stores.
+          uint64_t Val = DoubleToBits(CFP->getValue());
+          SDOperand Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+          SDOperand Hi = DAG.getConstant(Val >> 32, MVT::i32);
+          if (!TLI.isLittleEndian()) std::swap(Lo, Hi);
+
+          int SVOffset = ST->getSrcValueOffset();
+          unsigned Alignment = ST->getAlignment();
+          bool isVolatile = ST->isVolatile();
+
+          SDOperand St0 = DAG.getStore(Chain, Lo, Ptr, ST->getSrcValue(),
+                                       ST->getSrcValueOffset(),
+                                       isVolatile, ST->getAlignment());
+          Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+                            DAG.getConstant(4, Ptr.getValueType()));
+          SVOffset += 4;
+          if (Alignment > 4)
+            Alignment = 4;
+          SDOperand St1 = DAG.getStore(Chain, Hi, Ptr, ST->getSrcValue(),
+                                       SVOffset, isVolatile, Alignment);
+          return DAG.getNode(ISD::TokenFactor, MVT::Other, St0, St1);
+        }
+        break;
+      }
+    }
+  }
+
+  if (CombinerAA) { 
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDOperand BetterChain = FindBetterChain(N, Chain);
+    
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      // Replace the chain to avoid dependency.
+      SDOperand ReplStore;
+      if (ST->isTruncatingStore()) {
+        ReplStore = DAG.getTruncStore(BetterChain, Value, Ptr,
+          ST->getSrcValue(), ST->getSrcValueOffset(), ST->getStoredVT(),
+          ST->isVolatile(), ST->getAlignment());
+      } else {
+        ReplStore = DAG.getStore(BetterChain, Value, Ptr,
+          ST->getSrcValue(), ST->getSrcValueOffset(),
+          ST->isVolatile(), ST->getAlignment());
+      }
+      
+      // Create token to keep both nodes around.
+      SDOperand Token =
+        DAG.getNode(ISD::TokenFactor, MVT::Other, Chain, ReplStore);
+        
+      // Don't add users to work list.
+      return CombineTo(N, Token, false);
+    }
+  }
+  
+  // Try transforming N to an indexed store.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDOperand(N, 0);
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+  SDOperand InVec = N->getOperand(0);
+  SDOperand InVal = N->getOperand(1);
+  SDOperand EltNo = N->getOperand(2);
+  
+  // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+  // vector with the inserted element.
+  if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getValue();
+    SmallVector<SDOperand, 8> Ops(InVec.Val->op_begin(), InVec.Val->op_end());
+    if (Elt < Ops.size())
+      Ops[Elt] = InVal;
+    return DAG.getNode(ISD::BUILD_VECTOR, InVec.getValueType(),
+                       &Ops[0], Ops.size());
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  unsigned NumInScalars = N->getNumOperands();
+  MVT::ValueType VT = N->getValueType(0);
+  unsigned NumElts = MVT::getVectorNumElements(VT);
+  MVT::ValueType EltType = MVT::getVectorElementType(VT);
+
+  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+  // at most two distinct vectors, turn this into a shuffle node.
+  SDOperand VecIn1, VecIn2;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    // Ignore undef inputs.
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    
+    // If this input is something other than a EXTRACT_VECTOR_ELT with a
+    // constant index, bail out.
+    if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+      VecIn1 = VecIn2 = SDOperand(0, 0);
+      break;
+    }
+    
+    // If the input vector type disagrees with the result of the build_vector,
+    // we can't make a shuffle.
+    SDOperand ExtractedFromVec = N->getOperand(i).getOperand(0);
+    if (ExtractedFromVec.getValueType() != VT) {
+      VecIn1 = VecIn2 = SDOperand(0, 0);
+      break;
+    }
+    
+    // Otherwise, remember this.  We allow up to two distinct input vectors.
+    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+      continue;
+    
+    if (VecIn1.Val == 0) {
+      VecIn1 = ExtractedFromVec;
+    } else if (VecIn2.Val == 0) {
+      VecIn2 = ExtractedFromVec;
+    } else {
+      // Too many inputs.
+      VecIn1 = VecIn2 = SDOperand(0, 0);
+      break;
+    }
+  }
+  
+  // If everything is good, we can make a shuffle operation.
+  if (VecIn1.Val) {
+    SmallVector<SDOperand, 8> BuildVecIndices;
+    for (unsigned i = 0; i != NumInScalars; ++i) {
+      if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+        BuildVecIndices.push_back(DAG.getNode(ISD::UNDEF, TLI.getPointerTy()));
+        continue;
+      }
+      
+      SDOperand Extract = N->getOperand(i);
+      
+      // If extracting from the first vector, just use the index directly.
+      if (Extract.getOperand(0) == VecIn1) {
+        BuildVecIndices.push_back(Extract.getOperand(1));
+        continue;
+      }
+
+      // Otherwise, use InIdx + VecSize
+      unsigned Idx = cast<ConstantSDNode>(Extract.getOperand(1))->getValue();
+      BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars,
+                                                TLI.getPointerTy()));
+    }
+    
+    // Add count and size info.
+    MVT::ValueType BuildVecVT =
+      MVT::getVectorType(TLI.getPointerTy(), NumElts);
+    
+    // Return the new VECTOR_SHUFFLE node.
+    SDOperand Ops[5];
+    Ops[0] = VecIn1;
+    if (VecIn2.Val) {
+      Ops[1] = VecIn2;
+    } else {
+      // Use an undef build_vector as input for the second operand.
+      std::vector<SDOperand> UnOps(NumInScalars,
+                                   DAG.getNode(ISD::UNDEF, 
+                                               EltType));
+      Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, VT,
+                           &UnOps[0], UnOps.size());
+      AddToWorkList(Ops[1].Val);
+    }
+    Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, BuildVecVT,
+                         &BuildVecIndices[0], BuildVecIndices.size());
+    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Ops, 3);
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+  // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
+  // inputs come from at most two distinct vectors, turn this into a shuffle
+  // node.
+
+  // If we only have one input vector, we don't need to do any concatenation.
+  if (N->getNumOperands() == 1) {
+    return N->getOperand(0);
+  }
+
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+  SDOperand ShufMask = N->getOperand(2);
+  unsigned NumElts = ShufMask.getNumOperands();
+
+  // If the shuffle mask is an identity operation on the LHS, return the LHS.
+  bool isIdentity = true;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
+        cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() != i) {
+      isIdentity = false;
+      break;
+    }
+  }
+  if (isIdentity) return N->getOperand(0);
+
+  // If the shuffle mask is an identity operation on the RHS, return the RHS.
+  isIdentity = true;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
+        cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() != i+NumElts) {
+      isIdentity = false;
+      break;
+    }
+  }
+  if (isIdentity) return N->getOperand(1);
+
+  // Check if the shuffle is a unary shuffle, i.e. one of the vectors is not
+  // needed at all.
+  bool isUnary = true;
+  bool isSplat = true;
+  int VecNum = -1;
+  unsigned BaseIdx = 0;
+  for (unsigned i = 0; i != NumElts; ++i)
+    if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) {
+      unsigned Idx = cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue();
+      int V = (Idx < NumElts) ? 0 : 1;
+      if (VecNum == -1) {
+        VecNum = V;
+        BaseIdx = Idx;
+      } else {
+        if (BaseIdx != Idx)
+          isSplat = false;
+        if (VecNum != V) {
+          isUnary = false;
+          break;
+        }
+      }
+    }
+
+  SDOperand N0 = N->getOperand(0);
+  SDOperand N1 = N->getOperand(1);
+  // Normalize unary shuffle so the RHS is undef.
+  if (isUnary && VecNum == 1)
+    std::swap(N0, N1);
+
+  // If it is a splat, check if the argument vector is a build_vector with
+  // all scalar elements the same.
+  if (isSplat) {
+    SDNode *V = N0.Val;
+
+    // If this is a bit convert that changes the element type of the vector but
+    // not the number of vector elements, look through it.  Be careful not to
+    // look though conversions that change things like v4f32 to v2f64.
+    if (V->getOpcode() == ISD::BIT_CONVERT) {
+      SDOperand ConvInput = V->getOperand(0);
+      if (MVT::getVectorNumElements(ConvInput.getValueType()) == NumElts)
+        V = ConvInput.Val;
+    }
+
+    if (V->getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElems = V->getNumOperands();
+      if (NumElems > BaseIdx) {
+        SDOperand Base;
+        bool AllSame = true;
+        for (unsigned i = 0; i != NumElems; ++i) {
+          if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+            Base = V->getOperand(i);
+            break;
+          }
+        }
+        // Splat of <u, u, u, u>, return <u, u, u, u>
+        if (!Base.Val)
+          return N0;
+        for (unsigned i = 0; i != NumElems; ++i) {
+          if (V->getOperand(i).getOpcode() != ISD::UNDEF &&
+              V->getOperand(i) != Base) {
+            AllSame = false;
+            break;
+          }
+        }
+        // Splat of <x, x, x, x>, return <x, x, x, x>
+        if (AllSame)
+          return N0;
+      }
+    }
+  }
+
+  // If it is a unary or the LHS and the RHS are the same node, turn the RHS
+  // into an undef.
+  if (isUnary || N0 == N1) {
+    // Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the
+    // first operand.
+    SmallVector<SDOperand, 8> MappedOps;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF ||
+          cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() < NumElts) {
+        MappedOps.push_back(ShufMask.getOperand(i));
+      } else {
+        unsigned NewIdx = 
+          cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() - NumElts;
+        MappedOps.push_back(DAG.getConstant(NewIdx, MVT::i32));
+      }
+    }
+    ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMask.getValueType(),
+                           &MappedOps[0], MappedOps.size());
+    AddToWorkList(ShufMask.Val);
+    return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getValueType(0),
+                       N0,
+                       DAG.getNode(ISD::UNDEF, N->getValueType(0)),
+                       ShufMask);
+  }
+ 
+  return SDOperand();
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+///      vector_shuffle V, Zero, <0, 4, 2, 4>
+SDOperand DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+  SDOperand LHS = N->getOperand(0);
+  SDOperand RHS = N->getOperand(1);
+  if (N->getOpcode() == ISD::AND) {
+    if (RHS.getOpcode() == ISD::BIT_CONVERT)
+      RHS = RHS.getOperand(0);
+    if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+      std::vector<SDOperand> IdxOps;
+      unsigned NumOps = RHS.getNumOperands();
+      unsigned NumElts = NumOps;
+      MVT::ValueType EVT = MVT::getVectorElementType(RHS.getValueType());
+      for (unsigned i = 0; i != NumElts; ++i) {
+        SDOperand Elt = RHS.getOperand(i);
+        if (!isa<ConstantSDNode>(Elt))
+          return SDOperand();
+        else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+          IdxOps.push_back(DAG.getConstant(i, EVT));
+        else if (cast<ConstantSDNode>(Elt)->isNullValue())
+          IdxOps.push_back(DAG.getConstant(NumElts, EVT));
+        else
+          return SDOperand();
+      }
+
+      // Let's see if the target supports this vector_shuffle.
+      if (!TLI.isVectorClearMaskLegal(IdxOps, EVT, DAG))
+        return SDOperand();
+
+      // Return the new VECTOR_SHUFFLE node.
+      MVT::ValueType VT = MVT::getVectorType(EVT, NumElts);
+      std::vector<SDOperand> Ops;
+      LHS = DAG.getNode(ISD::BIT_CONVERT, VT, LHS);
+      Ops.push_back(LHS);
+      AddToWorkList(LHS.Val);
+      std::vector<SDOperand> ZeroOps(NumElts, DAG.getConstant(0, EVT));
+      Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, VT,
+                                &ZeroOps[0], ZeroOps.size()));
+      Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, VT,
+                                &IdxOps[0], IdxOps.size()));
+      SDOperand Result = DAG.getNode(ISD::VECTOR_SHUFFLE, VT,
+                                     &Ops[0], Ops.size());
+      if (VT != LHS.getValueType()) {
+        Result = DAG.getNode(ISD::BIT_CONVERT, LHS.getValueType(), Result);
+      }
+      return Result;
+    }
+  }
+  return SDOperand();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDOperand DAGCombiner::SimplifyVBinOp(SDNode *N) {
+  // After legalize, the target may be depending on adds and other
+  // binary ops to provide legal ways to construct constants or other
+  // things. Simplifying them may result in a loss of legality.
+  if (AfterLegalize) return SDOperand();
+
+  MVT::ValueType VT = N->getValueType(0);
+  assert(MVT::isVector(VT) && "SimplifyVBinOp only works on vectors!");
+
+  MVT::ValueType EltType = MVT::getVectorElementType(VT);
+  SDOperand LHS = N->getOperand(0);
+  SDOperand RHS = N->getOperand(1);
+  SDOperand Shuffle = XformToShuffleWithZero(N);
+  if (Shuffle.Val) return Shuffle;
+
+  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+  // this operation.
+  if (LHS.getOpcode() == ISD::BUILD_VECTOR && 
+      RHS.getOpcode() == ISD::BUILD_VECTOR) {
+    SmallVector<SDOperand, 8> Ops;
+    for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+      SDOperand LHSOp = LHS.getOperand(i);
+      SDOperand RHSOp = RHS.getOperand(i);
+      // If these two elements can't be folded, bail out.
+      if ((LHSOp.getOpcode() != ISD::UNDEF &&
+           LHSOp.getOpcode() != ISD::Constant &&
+           LHSOp.getOpcode() != ISD::ConstantFP) ||
+          (RHSOp.getOpcode() != ISD::UNDEF &&
+           RHSOp.getOpcode() != ISD::Constant &&
+           RHSOp.getOpcode() != ISD::ConstantFP))
+        break;
+      // Can't fold divide by zero.
+      if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+          N->getOpcode() == ISD::FDIV) {
+        if ((RHSOp.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHSOp.Val)->isNullValue()) ||
+            (RHSOp.getOpcode() == ISD::ConstantFP &&
+             !cast<ConstantFPSDNode>(RHSOp.Val)->getValue()))
+          break;
+      }
+      Ops.push_back(DAG.getNode(N->getOpcode(), EltType, LHSOp, RHSOp));
+      AddToWorkList(Ops.back().Val);
+      assert((Ops.back().getOpcode() == ISD::UNDEF ||
+              Ops.back().getOpcode() == ISD::Constant ||
+              Ops.back().getOpcode() == ISD::ConstantFP) &&
+             "Scalar binop didn't fold!");
+    }
+    
+    if (Ops.size() == LHS.getNumOperands()) {
+      MVT::ValueType VT = LHS.getValueType();
+      return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+    }
+  }
+  
+  return SDOperand();
+}
+
+SDOperand DAGCombiner::SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2){
+  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+  
+  SDOperand SCC = SimplifySelectCC(N0.getOperand(0), N0.getOperand(1), N1, N2,
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
+  // If we got a simplified select_cc node back from SimplifySelectCC, then
+  // break it down into a new SETCC node, and a new SELECT node, and then return
+  // the SELECT node, since we were called with a SELECT node.
+  if (SCC.Val) {
+    // Check to see if we got a select_cc back (to turn into setcc/select).
+    // Otherwise, just return whatever node we got back, like fabs.
+    if (SCC.getOpcode() == ISD::SELECT_CC) {
+      SDOperand SETCC = DAG.getNode(ISD::SETCC, N0.getValueType(),
+                                    SCC.getOperand(0), SCC.getOperand(1), 
+                                    SCC.getOperand(4));
+      AddToWorkList(SETCC.Val);
+      return DAG.getNode(ISD::SELECT, SCC.getValueType(), SCC.getOperand(2),
+                         SCC.getOperand(3), SETCC);
+    }
+    return SCC;
+  }
+  return SDOperand();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select.  Callers of this should assume that TheSelect is deleted if this
+/// returns true.  As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+///
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDOperand LHS, 
+                                    SDOperand RHS) {
+  
+  // If this is a select from two identical things, try to pull the operation
+  // through the select.
+  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
+    // If this is a load and the token chain is identical, replace the select
+    // of two loads with a load through a select of the address to load from.
+    // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+    // constants have been dropped into the constant pool.
+    if (LHS.getOpcode() == ISD::LOAD &&
+        // Token chains must be identical.
+        LHS.getOperand(0) == RHS.getOperand(0)) {
+      LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+      LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+      // If this is an EXTLOAD, the VT's must match.
+      if (LLD->getLoadedVT() == RLD->getLoadedVT()) {
+        // FIXME: this conflates two src values, discarding one.  This is not
+        // the right thing to do, but nothing uses srcvalues now.  When they do,
+        // turn SrcValue into a list of locations.
+        SDOperand Addr;
+        if (TheSelect->getOpcode() == ISD::SELECT) {
+          // Check that the condition doesn't reach either load.  If so, folding
+          // this will induce a cycle into the DAG.
+          if (!LLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+              !RLD->isPredecessor(TheSelect->getOperand(0).Val)) {
+            Addr = DAG.getNode(ISD::SELECT, LLD->getBasePtr().getValueType(),
+                               TheSelect->getOperand(0), LLD->getBasePtr(),
+                               RLD->getBasePtr());
+          }
+        } else {
+          // Check that the condition doesn't reach either load.  If so, folding
+          // this will induce a cycle into the DAG.
+          if (!LLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+              !RLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+              !LLD->isPredecessor(TheSelect->getOperand(1).Val) &&
+              !RLD->isPredecessor(TheSelect->getOperand(1).Val)) {
+            Addr = DAG.getNode(ISD::SELECT_CC, LLD->getBasePtr().getValueType(),
+                             TheSelect->getOperand(0),
+                             TheSelect->getOperand(1), 
+                             LLD->getBasePtr(), RLD->getBasePtr(),
+                             TheSelect->getOperand(4));
+          }
+        }
+        
+        if (Addr.Val) {
+          SDOperand Load;
+          if (LLD->getExtensionType() == ISD::NON_EXTLOAD)
+            Load = DAG.getLoad(TheSelect->getValueType(0), LLD->getChain(),
+                               Addr,LLD->getSrcValue(), 
+                               LLD->getSrcValueOffset(),
+                               LLD->isVolatile(), 
+                               LLD->getAlignment());
+          else {
+            Load = DAG.getExtLoad(LLD->getExtensionType(),
+                                  TheSelect->getValueType(0),
+                                  LLD->getChain(), Addr, LLD->getSrcValue(),
+                                  LLD->getSrcValueOffset(),
+                                  LLD->getLoadedVT(),
+                                  LLD->isVolatile(), 
+                                  LLD->getAlignment());
+          }
+          // Users of the select now use the result of the load.
+          CombineTo(TheSelect, Load);
+        
+          // Users of the old loads now use the new load's chain.  We know the
+          // old-load value is dead now.
+          CombineTo(LHS.Val, Load.getValue(0), Load.getValue(1));
+          CombineTo(RHS.Val, Load.getValue(0), Load.getValue(1));
+          return true;
+        }
+      }
+    }
+  }
+  
+  return false;
+}
+
+SDOperand DAGCombiner::SimplifySelectCC(SDOperand N0, SDOperand N1, 
+                                        SDOperand N2, SDOperand N3,
+                                        ISD::CondCode CC, bool NotExtCompare) {
+  
+  MVT::ValueType VT = N2.getValueType();
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.Val);
+
+  // Determine if the condition we're dealing with is constant
+  SDOperand SCC = SimplifySetCC(TLI.getSetCCResultTy(), N0, N1, CC, false);
+  if (SCC.Val) AddToWorkList(SCC.Val);
+  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.Val);
+
+  // fold select_cc true, x, y -> x
+  if (SCCC && SCCC->getValue())
+    return N2;
+  // fold select_cc false, x, y -> y
+  if (SCCC && SCCC->getValue() == 0)
+    return N3;
+  
+  // Check to see if we can simplify the select into an fabs node
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+    // Allow either -0.0 or 0.0
+    if (CFP->getValue() == 0.0) {
+      // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+      if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+          N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+          N2 == N3.getOperand(0))
+        return DAG.getNode(ISD::FABS, VT, N0);
+      
+      // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+      if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+          N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+          N2.getOperand(0) == N3)
+        return DAG.getNode(ISD::FABS, VT, N3);
+    }
+  }
+  
+  // Check to see if we can perform the "gzip trick", transforming
+  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+      MVT::isInteger(N0.getValueType()) && 
+      MVT::isInteger(N2.getValueType()) && 
+      (N1C->isNullValue() ||                    // (a < 0) ? b : 0
+       (N1C->getValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
+    MVT::ValueType XType = N0.getValueType();
+    MVT::ValueType AType = N2.getValueType();
+    if (XType >= AType) {
+      // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+      // single-bit constant.
+      if (N2C && ((N2C->getValue() & (N2C->getValue()-1)) == 0)) {
+        unsigned ShCtV = Log2_64(N2C->getValue());
+        ShCtV = MVT::getSizeInBits(XType)-ShCtV-1;
+        SDOperand ShCt = DAG.getConstant(ShCtV, TLI.getShiftAmountTy());
+        SDOperand Shift = DAG.getNode(ISD::SRL, XType, N0, ShCt);
+        AddToWorkList(Shift.Val);
+        if (XType > AType) {
+          Shift = DAG.getNode(ISD::TRUNCATE, AType, Shift);
+          AddToWorkList(Shift.Val);
+        }
+        return DAG.getNode(ISD::AND, AType, Shift, N2);
+      }
+      SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+                                    DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                                    TLI.getShiftAmountTy()));
+      AddToWorkList(Shift.Val);
+      if (XType > AType) {
+        Shift = DAG.getNode(ISD::TRUNCATE, AType, Shift);
+        AddToWorkList(Shift.Val);
+      }
+      return DAG.getNode(ISD::AND, AType, Shift, N2);
+    }
+  }
+  
+  // fold select C, 16, 0 -> shl C, 4
+  if (N2C && N3C && N3C->isNullValue() && isPowerOf2_64(N2C->getValue()) &&
+      TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) {
+    
+    // If the caller doesn't want us to simplify this into a zext of a compare,
+    // don't do it.
+    if (NotExtCompare && N2C->getValue() == 1)
+      return SDOperand();
+    
+    // Get a SetCC of the condition
+    // FIXME: Should probably make sure that setcc is legal if we ever have a
+    // target where it isn't.
+    SDOperand Temp, SCC;
+    // cast from setcc result type to select result type
+    if (AfterLegalize) {
+      SCC  = DAG.getSetCC(TLI.getSetCCResultTy(), N0, N1, CC);
+      if (N2.getValueType() < SCC.getValueType())
+        Temp = DAG.getZeroExtendInReg(SCC, N2.getValueType());
+      else
+        Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getValueType(), SCC);
+    } else {
+      SCC  = DAG.getSetCC(MVT::i1, N0, N1, CC);
+      Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getValueType(), SCC);
+    }
+    AddToWorkList(SCC.Val);
+    AddToWorkList(Temp.Val);
+    
+    if (N2C->getValue() == 1)
+      return Temp;
+    // shl setcc result by log2 n2c
+    return DAG.getNode(ISD::SHL, N2.getValueType(), Temp,
+                       DAG.getConstant(Log2_64(N2C->getValue()),
+                                       TLI.getShiftAmountTy()));
+  }
+    
+  // Check to see if this is the equivalent of setcc
+  // FIXME: Turn all of these into setcc if setcc if setcc is legal
+  // otherwise, go ahead with the folds.
+  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getValue() == 1ULL)) {
+    MVT::ValueType XType = N0.getValueType();
+    if (TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultTy())) {
+      SDOperand Res = DAG.getSetCC(TLI.getSetCCResultTy(), N0, N1, CC);
+      if (Res.getValueType() != VT)
+        Res = DAG.getNode(ISD::ZERO_EXTEND, VT, Res);
+      return Res;
+    }
+    
+    // seteq X, 0 -> srl (ctlz X, log2(size(X)))
+    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 
+        TLI.isOperationLegal(ISD::CTLZ, XType)) {
+      SDOperand Ctlz = DAG.getNode(ISD::CTLZ, XType, N0);
+      return DAG.getNode(ISD::SRL, XType, Ctlz, 
+                         DAG.getConstant(Log2_32(MVT::getSizeInBits(XType)),
+                                         TLI.getShiftAmountTy()));
+    }
+    // setgt X, 0 -> srl (and (-X, ~X), size(X)-1)
+    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 
+      SDOperand NegN0 = DAG.getNode(ISD::SUB, XType, DAG.getConstant(0, XType),
+                                    N0);
+      SDOperand NotN0 = DAG.getNode(ISD::XOR, XType, N0, 
+                                    DAG.getConstant(~0ULL, XType));
+      return DAG.getNode(ISD::SRL, XType, 
+                         DAG.getNode(ISD::AND, XType, NegN0, NotN0),
+                         DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                         TLI.getShiftAmountTy()));
+    }
+    // setgt X, -1 -> xor (srl (X, size(X)-1), 1)
+    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+      SDOperand Sign = DAG.getNode(ISD::SRL, XType, N0,
+                                   DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                                   TLI.getShiftAmountTy()));
+      return DAG.getNode(ISD::XOR, XType, Sign, DAG.getConstant(1, XType));
+    }
+  }
+  
+  // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
+      N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
+      N2.getOperand(0) == N1 && MVT::isInteger(N0.getValueType())) {
+    MVT::ValueType XType = N0.getValueType();
+    SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+                                  DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                                  TLI.getShiftAmountTy()));
+    SDOperand Add = DAG.getNode(ISD::ADD, XType, N0, Shift);
+    AddToWorkList(Shift.Val);
+    AddToWorkList(Add.Val);
+    return DAG.getNode(ISD::XOR, XType, Add, Shift);
+  }
+  // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
+      N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
+    if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
+      MVT::ValueType XType = N0.getValueType();
+      if (SubC->isNullValue() && MVT::isInteger(XType)) {
+        SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+                                    DAG.getConstant(MVT::getSizeInBits(XType)-1,
+                                                      TLI.getShiftAmountTy()));
+        SDOperand Add = DAG.getNode(ISD::ADD, XType, N0, Shift);
+        AddToWorkList(Shift.Val);
+        AddToWorkList(Add.Val);
+        return DAG.getNode(ISD::XOR, XType, Add, Shift);
+      }
+    }
+  }
+  
+  return SDOperand();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDOperand DAGCombiner::SimplifySetCC(MVT::ValueType VT, SDOperand N0,
+                                     SDOperand N1, ISD::CondCode Cond,
+                                     bool foldBooleans) {
+  TargetLowering::DAGCombinerInfo 
+    DagCombineInfo(DAG, !AfterLegalize, false, this);
+  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildSDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDOperand S = TLI.BuildSDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildUDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDOperand S = TLI.BuildUDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// FindBaseOffset - Return true if base is known not to alias with anything
+/// but itself.  Provides base object and offset as results.
+static bool FindBaseOffset(SDOperand Ptr, SDOperand &Base, int64_t &Offset) {
+  // Assume it is a primitive operation.
+  Base = Ptr; Offset = 0;
+  
+  // If it's an adding a simple constant then integrate the offset.
+  if (Base.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+      Base = Base.getOperand(0);
+      Offset += C->getValue();
+    }
+  }
+  
+  // If it's any of the following then it can't alias with anything but itself.
+  return isa<FrameIndexSDNode>(Base) ||
+         isa<ConstantPoolSDNode>(Base) ||
+         isa<GlobalAddressSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDOperand Ptr1, int64_t Size1,
+                          const Value *SrcValue1, int SrcValueOffset1,
+                          SDOperand Ptr2, int64_t Size2,
+                          const Value *SrcValue2, int SrcValueOffset2)
+{
+  // If they are the same then they must be aliases.
+  if (Ptr1 == Ptr2) return true;
+  
+  // Gather base node and offset information.
+  SDOperand Base1, Base2;
+  int64_t Offset1, Offset2;
+  bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
+  bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+  
+  // If they have a same base address then...
+  if (Base1 == Base2) {
+    // Check to see if the addresses overlap.
+    return!((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+  }
+  
+  // If we know both bases then they can't alias.
+  if (KnownBase1 && KnownBase2) return false;
+
+  if (CombinerGlobalAA) {
+    // Use alias analysis information.
+    int Overlap1 = Size1 + SrcValueOffset1 + Offset1;
+    int Overlap2 = Size2 + SrcValueOffset2 + Offset2;
+    AliasAnalysis::AliasResult AAResult = 
+                             AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+    if (AAResult == AliasAnalysis::NoAlias)
+      return false;
+  }
+
+  // Otherwise we have to assume they alias.
+  return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node.  Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+                        SDOperand &Ptr, int64_t &Size,
+                        const Value *&SrcValue, int &SrcValueOffset) {
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    Size = MVT::getSizeInBits(LD->getLoadedVT()) >> 3;
+    SrcValue = LD->getSrcValue();
+    SrcValueOffset = LD->getSrcValueOffset();
+    return true;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    Size = MVT::getSizeInBits(ST->getStoredVT()) >> 3;
+    SrcValue = ST->getSrcValue();
+    SrcValueOffset = ST->getSrcValueOffset();
+  } else {
+    assert(0 && "FindAliasInfo expected a memory operand");
+  }
+  
+  return false;
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDOperand OriginalChain,
+                                   SmallVector<SDOperand, 8> &Aliases) {
+  SmallVector<SDOperand, 8> Chains;     // List of chains to visit.
+  std::set<SDNode *> Visited;           // Visited node set.
+  
+  // Get alias information for node.
+  SDOperand Ptr;
+  int64_t Size;
+  const Value *SrcValue;
+  int SrcValueOffset;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+
+  // Starting off.
+  Chains.push_back(OriginalChain);
+  
+  // Look at each chain and determine if it is an alias.  If so, add it to the
+  // aliases list.  If not, then continue up the chain looking for the next
+  // candidate.  
+  while (!Chains.empty()) {
+    SDOperand Chain = Chains.back();
+    Chains.pop_back();
+    
+     // Don't bother if we've been before.
+    if (Visited.find(Chain.Val) != Visited.end()) continue;
+    Visited.insert(Chain.Val);
+  
+    switch (Chain.getOpcode()) {
+    case ISD::EntryToken:
+      // Entry token is ideal chain operand, but handled in FindBetterChain.
+      break;
+      
+    case ISD::LOAD:
+    case ISD::STORE: {
+      // Get alias information for Chain.
+      SDOperand OpPtr;
+      int64_t OpSize;
+      const Value *OpSrcValue;
+      int OpSrcValueOffset;
+      bool IsOpLoad = FindAliasInfo(Chain.Val, OpPtr, OpSize,
+                                    OpSrcValue, OpSrcValueOffset);
+      
+      // If chain is alias then stop here.
+      if (!(IsLoad && IsOpLoad) &&
+          isAlias(Ptr, Size, SrcValue, SrcValueOffset,
+                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+        Aliases.push_back(Chain);
+      } else {
+        // Look further up the chain.
+        Chains.push_back(Chain.getOperand(0));      
+        // Clean up old chain.
+        AddToWorkList(Chain.Val);
+      }
+      break;
+    }
+    
+    case ISD::TokenFactor:
+      // We have to check each of the operands of the token factor, so we queue
+      // then up.  Adding the  operands to the queue (stack) in reverse order
+      // maintains the original order and increases the likelihood that getNode
+      // will find a matching token factor (CSE.)
+      for (unsigned n = Chain.getNumOperands(); n;)
+        Chains.push_back(Chain.getOperand(--n));
+      // Eliminate the token factor if we can.
+      AddToWorkList(Chain.Val);
+      break;
+      
+    default:
+      // For all other instructions we will just have to take what we can get.
+      Aliases.push_back(Chain);
+      break;
+    }
+  }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDOperand DAGCombiner::FindBetterChain(SDNode *N, SDOperand OldChain) {
+  SmallVector<SDOperand, 8> Aliases;  // Ops for replacing token factor.
+  
+  // Accumulate all the aliases to this node.
+  GatherAllAliases(N, OldChain, Aliases);
+  
+  if (Aliases.size() == 0) {
+    // If no operands then chain to entry token.
+    return DAG.getEntryNode();
+  } else if (Aliases.size() == 1) {
+    // If a single operand then chain to it.  We don't need to revisit it.
+    return Aliases[0];
+  }
+
+  // Construct a custom tailored token factor.
+  SDOperand NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                                   &Aliases[0], Aliases.size());
+
+  // Make sure the old chain gets cleaned up.
+  if (NewChain != OldChain) AddToWorkList(OldChain.Val);
+  
+  return NewChain;
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(bool RunningAfterLegalize, AliasAnalysis &AA) {
+  if (!RunningAfterLegalize && ViewDAGCombine1)
+    viewGraph();
+  if (RunningAfterLegalize && ViewDAGCombine2)
+    viewGraph();
+  /// run - This is the main entry point to this class.
+  ///
+  DAGCombiner(*this, AA).Run(RunningAfterLegalize);
+}

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000..5bcee35
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

@@ -0,0 +1,5755 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+                 cl::desc("Pop up a window to show dags before legalize"));
+#else
+static const bool ViewLegalizeDAGs = 0;
+#endif
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it.  This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing.  For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class VISIBILITY_HIDDEN SelectionDAGLegalize {
+  TargetLowering &TLI;
+  SelectionDAG &DAG;
+
+  // Libcall insertion helpers.
+  
+  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+  /// legalized.  We use this to ensure that calls are properly serialized
+  /// against each other, including inserted libcalls.
+  SDOperand LastCALLSEQ_END;
+  
+  /// IsLegalizingCall - This member is used *only* for purposes of providing
+  /// helpful assertions that a libcall isn't created while another call is 
+  /// being legalized (which could lead to non-serialized call sequences).
+  bool IsLegalizingCall;
+  
+  enum LegalizeAction {
+    Legal,      // The target natively supports this operation.
+    Promote,    // This operation should be executed in a larger type.
+    Expand      // Try to expand this to other ops, otherwise use a libcall.
+  };
+  
+  /// ValueTypeActions - This is a bitvector that contains two bits for each
+  /// value type, where the two bits correspond to the LegalizeAction enum.
+  /// This can be queried with "getTypeAction(VT)".
+  TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+  /// LegalizedNodes - For nodes that are of legal width, and that have more
+  /// than one use, this map indicates what regularized operand to use.  This
+  /// allows us to avoid legalizing the same thing more than once.
+  DenseMap<SDOperand, SDOperand> LegalizedNodes;
+
+  /// PromotedNodes - For nodes that are below legal width, and that have more
+  /// than one use, this map indicates what promoted value to use.  This allows
+  /// us to avoid promoting the same thing more than once.
+  DenseMap<SDOperand, SDOperand> PromotedNodes;
+
+  /// ExpandedNodes - For nodes that need to be expanded this map indicates
+  /// which which operands are the expanded version of the input.  This allows
+  /// us to avoid expanding the same node more than once.
+  DenseMap<SDOperand, std::pair<SDOperand, SDOperand> > ExpandedNodes;
+
+  /// SplitNodes - For vector nodes that need to be split, this map indicates
+  /// which which operands are the split version of the input.  This allows us
+  /// to avoid splitting the same node more than once.
+  std::map<SDOperand, std::pair<SDOperand, SDOperand> > SplitNodes;
+  
+  /// ScalarizedNodes - For nodes that need to be converted from vector types to
+  /// scalar types, this contains the mapping of ones we have already
+  /// processed to the result.
+  std::map<SDOperand, SDOperand> ScalarizedNodes;
+  
+  void AddLegalizedOperand(SDOperand From, SDOperand To) {
+    LegalizedNodes.insert(std::make_pair(From, To));
+    // If someone requests legalization of the new node, return itself.
+    if (From != To)
+      LegalizedNodes.insert(std::make_pair(To, To));
+  }
+  void AddPromotedOperand(SDOperand From, SDOperand To) {
+    bool isNew = PromotedNodes.insert(std::make_pair(From, To));
+    assert(isNew && "Got into the map somehow?");
+    // If someone requests legalization of the new node, return itself.
+    LegalizedNodes.insert(std::make_pair(To, To));
+  }
+
+public:
+
+  SelectionDAGLegalize(SelectionDAG &DAG);
+
+  /// getTypeAction - Return how we should legalize values of this type, either
+  /// it is already legal or we need to expand it into multiple registers of
+  /// smaller integer type, or we need to promote it to a larger type.
+  LegalizeAction getTypeAction(MVT::ValueType VT) const {
+    return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+  }
+
+  /// isTypeLegal - Return true if this type is legal on this target.
+  ///
+  bool isTypeLegal(MVT::ValueType VT) const {
+    return getTypeAction(VT) == Legal;
+  }
+
+  void LegalizeDAG();
+
+private:
+  /// HandleOp - Legalize, Promote, or Expand the specified operand as
+  /// appropriate for its type.
+  void HandleOp(SDOperand Op);
+    
+  /// LegalizeOp - We know that the specified value has a legal type.
+  /// Recursively ensure that the operands have legal types, then return the
+  /// result.
+  SDOperand LegalizeOp(SDOperand O);
+  
+  /// PromoteOp - Given an operation that produces a value in an invalid type,
+  /// promote it to compute the value into a larger type.  The produced value
+  /// will have the correct bits for the low portion of the register, but no
+  /// guarantee is made about the top bits: it may be zero, sign-extended, or
+  /// garbage.
+  SDOperand PromoteOp(SDOperand O);
+
+  /// ExpandOp - Expand the specified SDOperand into its two component pieces
+  /// Lo&Hi.  Note that the Op MUST be an expanded type.  As a result of this,
+  /// the LegalizeNodes map is filled in for any results that are not expanded,
+  /// the ExpandedNodes map is filled in for any results that are expanded, and
+  /// the Lo/Hi values are returned.   This applies to integer types and Vector
+  /// types.
+  void ExpandOp(SDOperand O, SDOperand &Lo, SDOperand &Hi);
+
+  /// SplitVectorOp - Given an operand of vector type, break it down into
+  /// two smaller values.
+  void SplitVectorOp(SDOperand O, SDOperand &Lo, SDOperand &Hi);
+  
+  /// ScalarizeVectorOp - Given an operand of single-element vector type
+  /// (e.g. v1f32), convert it into the equivalent operation that returns a
+  /// scalar (e.g. f32) value.
+  SDOperand ScalarizeVectorOp(SDOperand O);
+  
+  /// isShuffleLegal - Return true if a vector shuffle is legal with the
+  /// specified mask and type.  Targets can specify exactly which masks they
+  /// support and the code generator is tasked with not creating illegal masks.
+  ///
+  /// Note that this will also return true for shuffles that are promoted to a
+  /// different type.
+  ///
+  /// If this is a legal shuffle, this method returns the (possibly promoted)
+  /// build_vector Mask.  If it's not a legal shuffle, it returns null.
+  SDNode *isShuffleLegal(MVT::ValueType VT, SDOperand Mask) const;
+  
+  bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                    SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+  void LegalizeSetCCOperands(SDOperand &LHS, SDOperand &RHS, SDOperand &CC);
+    
+  SDOperand CreateStackTemporary(MVT::ValueType VT);
+
+  SDOperand ExpandLibCall(const char *Name, SDNode *Node, bool isSigned,
+                          SDOperand &Hi);
+  SDOperand ExpandIntToFP(bool isSigned, MVT::ValueType DestTy,
+                          SDOperand Source);
+
+  SDOperand ExpandBIT_CONVERT(MVT::ValueType DestVT, SDOperand SrcOp);
+  SDOperand ExpandBUILD_VECTOR(SDNode *Node);
+  SDOperand ExpandSCALAR_TO_VECTOR(SDNode *Node);
+  SDOperand ExpandLegalINT_TO_FP(bool isSigned,
+                                 SDOperand LegalOp,
+                                 MVT::ValueType DestVT);
+  SDOperand PromoteLegalINT_TO_FP(SDOperand LegalOp, MVT::ValueType DestVT,
+                                  bool isSigned);
+  SDOperand PromoteLegalFP_TO_INT(SDOperand LegalOp, MVT::ValueType DestVT,
+                                  bool isSigned);
+
+  SDOperand ExpandBSWAP(SDOperand Op);
+  SDOperand ExpandBitCount(unsigned Opc, SDOperand Op);
+  bool ExpandShift(unsigned Opc, SDOperand Op, SDOperand Amt,
+                   SDOperand &Lo, SDOperand &Hi);
+  void ExpandShiftParts(unsigned NodeOp, SDOperand Op, SDOperand Amt,
+                        SDOperand &Lo, SDOperand &Hi);
+
+  SDOperand ExpandEXTRACT_SUBVECTOR(SDOperand Op);
+  SDOperand ExpandEXTRACT_VECTOR_ELT(SDOperand Op);
+  
+  SDOperand getIntPtrConstant(uint64_t Val) {
+    return DAG.getConstant(Val, TLI.getPointerTy());
+  }
+};
+}
+
+/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the
+/// specified mask and type.  Targets can specify exactly which masks they
+/// support and the code generator is tasked with not creating illegal masks.
+///
+/// Note that this will also return true for shuffles that are promoted to a
+/// different type.
+SDNode *SelectionDAGLegalize::isShuffleLegal(MVT::ValueType VT, 
+                                             SDOperand Mask) const {
+  switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) {
+  default: return 0;
+  case TargetLowering::Legal:
+  case TargetLowering::Custom:
+    break;
+  case TargetLowering::Promote: {
+    // If this is promoted to a different type, convert the shuffle mask and
+    // ask if it is legal in the promoted type!
+    MVT::ValueType NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT);
+
+    // If we changed # elements, change the shuffle mask.
+    unsigned NumEltsGrowth =
+      MVT::getVectorNumElements(NVT) / MVT::getVectorNumElements(VT);
+    assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+    if (NumEltsGrowth > 1) {
+      // Renumber the elements.
+      SmallVector<SDOperand, 8> Ops;
+      for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) {
+        SDOperand InOp = Mask.getOperand(i);
+        for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+          if (InOp.getOpcode() == ISD::UNDEF)
+            Ops.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+          else {
+            unsigned InEltNo = cast<ConstantSDNode>(InOp)->getValue();
+            Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, MVT::i32));
+          }
+        }
+      }
+      Mask = DAG.getNode(ISD::BUILD_VECTOR, NVT, &Ops[0], Ops.size());
+    }
+    VT = NVT;
+    break;
+  }
+  }
+  return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.Val : 0;
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
+  : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+    ValueTypeActions(TLI.getValueTypeActions()) {
+  assert(MVT::LAST_VALUETYPE <= 32 &&
+         "Too many value types for ValueTypeActions to hold!");
+}
+
+/// ComputeTopDownOrdering - Compute a top-down ordering of the dag, where Order
+/// contains all of a nodes operands before it contains the node.
+static void ComputeTopDownOrdering(SelectionDAG &DAG,
+                                   SmallVector<SDNode*, 64> &Order) {
+
+  DenseMap<SDNode*, unsigned> Visited;
+  std::vector<SDNode*> Worklist;
+  Worklist.reserve(128);
+  
+  // Compute ordering from all of the leaves in the graphs, those (like the
+  // entry node) that have no operands.
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    if (I->getNumOperands() == 0) {
+      Visited[I] = 0 - 1U;
+      Worklist.push_back(I);
+    }
+  }
+  
+  while (!Worklist.empty()) {
+    SDNode *N = Worklist.back();
+    Worklist.pop_back();
+    
+    if (++Visited[N] != N->getNumOperands())
+      continue;  // Haven't visited all operands yet
+    
+    Order.push_back(N);
+
+    // Now that we have N in, add anything that uses it if all of their operands
+    // are now done.
+    for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+         UI != E; ++UI)
+      Worklist.push_back(*UI);
+  }
+
+  assert(Order.size() == Visited.size() &&
+         Order.size() == 
+         (unsigned)std::distance(DAG.allnodes_begin(), DAG.allnodes_end()) &&
+         "Error: DAG is cyclic!");
+}
+
+
+void SelectionDAGLegalize::LegalizeDAG() {
+  LastCALLSEQ_END = DAG.getEntryNode();
+  IsLegalizingCall = false;
+  
+  // The legalize process is inherently a bottom-up recursive process (users
+  // legalize their uses before themselves).  Given infinite stack space, we
+  // could just start legalizing on the root and traverse the whole graph.  In
+  // practice however, this causes us to run out of stack space on large basic
+  // blocks.  To avoid this problem, compute an ordering of the nodes where each
+  // node is only legalized after all of its operands are legalized.
+  SmallVector<SDNode*, 64> Order;
+  ComputeTopDownOrdering(DAG, Order);
+  
+  for (unsigned i = 0, e = Order.size(); i != e; ++i)
+    HandleOp(SDOperand(Order[i], 0));
+
+  // Finally, it's possible the root changed.  Get the new root.
+  SDOperand OldRoot = DAG.getRoot();
+  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+  DAG.setRoot(LegalizedNodes[OldRoot]);
+
+  ExpandedNodes.clear();
+  LegalizedNodes.clear();
+  PromotedNodes.clear();
+  SplitNodes.clear();
+  ScalarizedNodes.clear();
+
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node) {
+  if (Node->getOpcode() == ISD::CALLSEQ_END)
+    return Node;
+  if (Node->use_empty())
+    return 0;   // No CallSeqEnd
+  
+  // The chain is usually at the end.
+  SDOperand TheChain(Node, Node->getNumValues()-1);
+  if (TheChain.getValueType() != MVT::Other) {
+    // Sometimes it's at the beginning.
+    TheChain = SDOperand(Node, 0);
+    if (TheChain.getValueType() != MVT::Other) {
+      // Otherwise, hunt for it.
+      for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+        if (Node->getValueType(i) == MVT::Other) {
+          TheChain = SDOperand(Node, i);
+          break;
+        }
+          
+      // Otherwise, we walked into a node without a chain.  
+      if (TheChain.getValueType() != MVT::Other)
+        return 0;
+    }
+  }
+  
+  for (SDNode::use_iterator UI = Node->use_begin(),
+       E = Node->use_end(); UI != E; ++UI) {
+    
+    // Make sure to only follow users of our token chain.
+    SDNode *User = *UI;
+    for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+      if (User->getOperand(i) == TheChain)
+        if (SDNode *Result = FindCallEndFromCallStart(User))
+          return Result;
+  }
+  return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call 
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+  assert(Node && "Didn't find callseq_start for a call??");
+  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+  
+  assert(Node->getOperand(0).getValueType() == MVT::Other &&
+         "Node doesn't have a token chain argument!");
+  return FindCallStartFromCallEnd(Node->getOperand(0).Val);
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest.  If no dest operands can get to dest, 
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo.  This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+  if (N == Dest) return true;  // N certainly leads to Dest :)
+  
+  // If we've already processed this node and it does lead to Dest, there is no
+  // need to reprocess it.
+  if (NodesLeadingTo.count(N)) return true;
+  
+  // If the first result of this node has been already legalized, then it cannot
+  // reach N.
+  switch (getTypeAction(N->getValueType(0))) {
+  case Legal: 
+    if (LegalizedNodes.count(SDOperand(N, 0))) return false;
+    break;
+  case Promote:
+    if (PromotedNodes.count(SDOperand(N, 0))) return false;
+    break;
+  case Expand:
+    if (ExpandedNodes.count(SDOperand(N, 0))) return false;
+    break;
+  }
+  
+  // Okay, this node has not already been legalized.  Check and legalize all
+  // operands.  If none lead to Dest, then we can legalize this node.
+  bool OperandsLeadToDest = false;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    OperandsLeadToDest |=     // If an operand leads to Dest, so do we.
+      LegalizeAllNodesNotLeadingTo(N->getOperand(i).Val, Dest, NodesLeadingTo);
+
+  if (OperandsLeadToDest) {
+    NodesLeadingTo.insert(N);
+    return true;
+  }
+
+  // Okay, this node looks safe, legalize it and return false.
+  HandleOp(SDOperand(N, 0));
+  return false;
+}
+
+/// HandleOp - Legalize, Promote, or Expand the specified operand as
+/// appropriate for its type.
+void SelectionDAGLegalize::HandleOp(SDOperand Op) {
+  MVT::ValueType VT = Op.getValueType();
+  switch (getTypeAction(VT)) {
+  default: assert(0 && "Bad type action!");
+  case Legal:   (void)LegalizeOp(Op); break;
+  case Promote: (void)PromoteOp(Op); break;
+  case Expand:
+    if (!MVT::isVector(VT)) {
+      // If this is an illegal scalar, expand it into its two component
+      // pieces.
+      SDOperand X, Y;
+      ExpandOp(Op, X, Y);
+    } else if (MVT::getVectorNumElements(VT) == 1) {
+      // If this is an illegal single element vector, convert it to a
+      // scalar operation.
+      (void)ScalarizeVectorOp(Op);
+    } else {
+      // Otherwise, this is an illegal multiple element vector.
+      // Split it in half and legalize both parts.
+      SDOperand X, Y;
+      SplitVectorOp(Op, X, Y);
+    }
+    break;
+  }
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDOperand ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+                                  SelectionDAG &DAG, TargetLowering &TLI) {
+  bool Extend = false;
+
+  // If a FP immediate is precise when represented as a float and if the
+  // target can do an extending load from float to double, we put it into
+  // the constant pool as a float, even if it's is statically typed as a
+  // double.
+  MVT::ValueType VT = CFP->getValueType(0);
+  bool isDouble = VT == MVT::f64;
+  ConstantFP *LLVMC = ConstantFP::get(isDouble ? Type::DoubleTy :
+                                      Type::FloatTy, CFP->getValue());
+  if (!UseCP) {
+    double Val = LLVMC->getValue();
+    return isDouble
+      ? DAG.getConstant(DoubleToBits(Val), MVT::i64)
+      : DAG.getConstant(FloatToBits(Val), MVT::i32);
+  }
+
+  if (isDouble && CFP->isExactlyValue((float)CFP->getValue()) &&
+      // Only do this if the target has a native EXTLOAD instruction from f32.
+      TLI.isLoadXLegal(ISD::EXTLOAD, MVT::f32)) {
+    LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC,Type::FloatTy));
+    VT = MVT::f32;
+    Extend = true;
+  }
+
+  SDOperand CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+  if (Extend) {
+    return DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, DAG.getEntryNode(),
+                          CPIdx, NULL, 0, MVT::f32);
+  } else {
+    return DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
+  }
+}
+
+
+/// ExpandFCOPYSIGNToBitwiseOps - Expands fcopysign to a series of bitwise
+/// operations.
+static
+SDOperand ExpandFCOPYSIGNToBitwiseOps(SDNode *Node, MVT::ValueType NVT,
+                                      SelectionDAG &DAG, TargetLowering &TLI) {
+  MVT::ValueType VT = Node->getValueType(0);
+  MVT::ValueType SrcVT = Node->getOperand(1).getValueType();
+  assert((SrcVT == MVT::f32 || SrcVT == MVT::f64) &&
+         "fcopysign expansion only supported for f32 and f64");
+  MVT::ValueType SrcNVT = (SrcVT == MVT::f64) ? MVT::i64 : MVT::i32;
+
+  // First get the sign bit of second operand.
+  SDOperand Mask1 = (SrcVT == MVT::f64)
+    ? DAG.getConstantFP(BitsToDouble(1ULL << 63), SrcVT)
+    : DAG.getConstantFP(BitsToFloat(1U << 31), SrcVT);
+  Mask1 = DAG.getNode(ISD::BIT_CONVERT, SrcNVT, Mask1);
+  SDOperand SignBit= DAG.getNode(ISD::BIT_CONVERT, SrcNVT, Node->getOperand(1));
+  SignBit = DAG.getNode(ISD::AND, SrcNVT, SignBit, Mask1);
+  // Shift right or sign-extend it if the two operands have different types.
+  int SizeDiff = MVT::getSizeInBits(SrcNVT) - MVT::getSizeInBits(NVT);
+  if (SizeDiff > 0) {
+    SignBit = DAG.getNode(ISD::SRL, SrcNVT, SignBit,
+                          DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+    SignBit = DAG.getNode(ISD::TRUNCATE, NVT, SignBit);
+  } else if (SizeDiff < 0)
+    SignBit = DAG.getNode(ISD::SIGN_EXTEND, NVT, SignBit);
+
+  // Clear the sign bit of first operand.
+  SDOperand Mask2 = (VT == MVT::f64)
+    ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), VT)
+    : DAG.getConstantFP(BitsToFloat(~(1U << 31)), VT);
+  Mask2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask2);
+  SDOperand Result = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+  Result = DAG.getNode(ISD::AND, NVT, Result, Mask2);
+
+  // Or the value with the sign bit.
+  Result = DAG.getNode(ISD::OR, NVT, Result, SignBit);
+  return Result;
+}
+
+
+/// LegalizeOp - We know that the specified value has a legal type, and
+/// that its operands are legal.  Now ensure that the operation itself
+/// is legal, recursively ensuring that the operands' operations remain
+/// legal.
+SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
+  assert(isTypeLegal(Op.getValueType()) &&
+         "Caller should expand or promote operands that are not legal!");
+  SDNode *Node = Op.Val;
+
+  // If this operation defines any values that cannot be represented in a
+  // register on this target, make sure to expand or promote them.
+  if (Node->getNumValues() > 1) {
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      if (getTypeAction(Node->getValueType(i)) != Legal) {
+        HandleOp(Op.getValue(i));
+        assert(LegalizedNodes.count(Op) &&
+               "Handling didn't add legal operands!");
+        return LegalizedNodes[Op];
+      }
+  }
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  DenseMap<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op);
+  if (I != LegalizedNodes.end()) return I->second;
+
+  SDOperand Tmp1, Tmp2, Tmp3, Tmp4;
+  SDOperand Result = Op;
+  bool isCustom = false;
+  
+  switch (Node->getOpcode()) {
+  case ISD::FrameIndex:
+  case ISD::EntryToken:
+  case ISD::Register:
+  case ISD::BasicBlock:
+  case ISD::TargetFrameIndex:
+  case ISD::TargetJumpTable:
+  case ISD::TargetConstant:
+  case ISD::TargetConstantFP:
+  case ISD::TargetConstantPool:
+  case ISD::TargetGlobalAddress:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::TargetExternalSymbol:
+  case ISD::VALUETYPE:
+  case ISD::SRCVALUE:
+  case ISD::STRING:
+  case ISD::CONDCODE:
+    // Primitives must all be legal.
+    assert(TLI.isOperationLegal(Node->getValueType(0), Node->getValueType(0)) &&
+           "This must be legal!");
+    break;
+  default:
+    if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+      // If this is a target node, legalize it by legalizing the operands then
+      // passing it through.
+      SmallVector<SDOperand, 8> Ops;
+      for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+        Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+      Result = DAG.UpdateNodeOperands(Result.getValue(0), &Ops[0], Ops.size());
+
+      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+        AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+      return Result.getValue(Op.ResNo);
+    }
+    // Otherwise this is an unhandled builtin node.  splat.
+#ifndef NDEBUG
+    cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to legalize this operator!");
+    abort();
+  case ISD::GLOBAL_OFFSET_TABLE:
+  case ISD::GlobalAddress:
+  case ISD::GlobalTLSAddress:
+  case ISD::ExternalSymbol:
+  case ISD::ConstantPool:
+  case ISD::JumpTable: // Nothing to do.
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Op, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      // FALLTHROUGH if the target doesn't want to lower this op after all.
+    case TargetLowering::Legal:
+      break;
+    }
+    break;
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR:
+  case ISD::FRAME_TO_ARGS_OFFSET:
+    // The only option for these nodes is to custom lower them.  If the target
+    // does not custom lower them, then return zero.
+    Tmp1 = TLI.LowerOperation(Op, DAG);
+    if (Tmp1.Val) 
+      Result = Tmp1;
+    else
+      Result = DAG.getConstant(0, TLI.getPointerTy());
+    break;
+  case ISD::EXCEPTIONADDR: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    MVT::ValueType VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand: {
+        unsigned Reg = TLI.getExceptionAddressRegister();
+        Result = DAG.getCopyFromReg(Tmp1, Reg, VT).getValue(Op.ResNo);
+      }
+      break;
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Op, DAG);
+      if (Result.Val) break;
+      // Fall Thru
+    case TargetLowering::Legal: {
+      SDOperand Ops[] = { DAG.getConstant(0, VT), Tmp1 };
+      Result = DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+                           Ops, 2).getValue(Op.ResNo);
+      break;
+    }
+    }
+    }
+    break;
+  case ISD::EHSELECTION: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    MVT::ValueType VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand: {
+        unsigned Reg = TLI.getExceptionSelectorRegister();
+        Result = DAG.getCopyFromReg(Tmp2, Reg, VT).getValue(Op.ResNo);
+      }
+      break;
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Op, DAG);
+      if (Result.Val) break;
+      // Fall Thru
+    case TargetLowering::Legal: {
+      SDOperand Ops[] = { DAG.getConstant(0, VT), Tmp2 };
+      Result = DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+                           Ops, 2).getValue(Op.ResNo);
+      break;
+    }
+    }
+    }
+    break;
+  case ISD::EH_RETURN: {
+    MVT::ValueType VT = Node->getValueType(0);
+    // The only "good" option for this node is to custom lower it.
+    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+    default: assert(0 && "This action is not supported at all!");
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Op, DAG);
+      if (Result.Val) break;
+      // Fall Thru
+    case TargetLowering::Legal:
+      // Target does not know, how to lower this, lower to noop
+      Result = LegalizeOp(Node->getOperand(0));
+      break;
+    }
+    }
+    break;
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::MERGE_VALUES:
+    // Legalize eliminates MERGE_VALUES nodes.
+    Result = Node->getOperand(Op.ResNo);
+    break;
+  case ISD::CopyFromReg:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Result = Op.getValue(0);
+    if (Node->getNumValues() == 2) {
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+    } else {
+      assert(Node->getNumValues() == 3 && "Invalid copyfromreg!");
+      if (Node->getNumOperands() == 3) {
+        Tmp2 = LegalizeOp(Node->getOperand(2));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1),Tmp2);
+      } else {
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+      }
+      AddLegalizedOperand(Op.getValue(2), Result.getValue(2));
+    }
+    // Since CopyFromReg produces two values, make sure to remember that we
+    // legalized both of them.
+    AddLegalizedOperand(Op.getValue(0), Result);
+    AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+    return Result.getValue(Op.ResNo);
+  case ISD::UNDEF: {
+    MVT::ValueType VT = Op.getValueType();
+    switch (TLI.getOperationAction(ISD::UNDEF, VT)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand:
+      if (MVT::isInteger(VT))
+        Result = DAG.getConstant(0, VT);
+      else if (MVT::isFloatingPoint(VT))
+        Result = DAG.getConstantFP(0, VT);
+      else
+        assert(0 && "Unknown value type!");
+      break;
+    case TargetLowering::Legal:
+      break;
+    }
+    break;
+  }
+    
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID: {
+    SmallVector<SDOperand, 8> Ops;
+    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+    Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+    
+    // Allow the target to custom lower its intrinsics if it wants to.
+    if (TLI.getOperationAction(Node->getOpcode(), MVT::Other) == 
+        TargetLowering::Custom) {
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) Result = Tmp3;
+    }
+
+    if (Result.Val->getNumValues() == 1) break;
+
+    // Must have return value and chain result.
+    assert(Result.Val->getNumValues() == 2 &&
+           "Cannot return more than two values!");
+
+    // Since loads produce two values, make sure to remember that we 
+    // legalized both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.ResNo);
+  }    
+
+  case ISD::LOCATION:
+    assert(Node->getNumOperands() == 5 && "Invalid LOCATION node!");
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the input chain.
+    
+    switch (TLI.getOperationAction(ISD::LOCATION, MVT::Other)) {
+    case TargetLowering::Promote:
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand: {
+      MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+      bool useDEBUG_LOC = TLI.isOperationLegal(ISD::DEBUG_LOC, MVT::Other);
+      bool useLABEL = TLI.isOperationLegal(ISD::LABEL, MVT::Other);
+      
+      if (MMI && (useDEBUG_LOC || useLABEL)) {
+        const std::string &FName =
+          cast<StringSDNode>(Node->getOperand(3))->getValue();
+        const std::string &DirName = 
+          cast<StringSDNode>(Node->getOperand(4))->getValue();
+        unsigned SrcFile = MMI->RecordSource(DirName, FName);
+
+        SmallVector<SDOperand, 8> Ops;
+        Ops.push_back(Tmp1);  // chain
+        SDOperand LineOp = Node->getOperand(1);
+        SDOperand ColOp = Node->getOperand(2);
+        
+        if (useDEBUG_LOC) {
+          Ops.push_back(LineOp);  // line #
+          Ops.push_back(ColOp);  // col #
+          Ops.push_back(DAG.getConstant(SrcFile, MVT::i32));  // source file id
+          Result = DAG.getNode(ISD::DEBUG_LOC, MVT::Other, &Ops[0], Ops.size());
+        } else {
+          unsigned Line = cast<ConstantSDNode>(LineOp)->getValue();
+          unsigned Col = cast<ConstantSDNode>(ColOp)->getValue();
+          unsigned ID = MMI->RecordLabel(Line, Col, SrcFile);
+          Ops.push_back(DAG.getConstant(ID, MVT::i32));
+          Result = DAG.getNode(ISD::LABEL, MVT::Other,&Ops[0],Ops.size());
+        }
+      } else {
+        Result = Tmp1;  // chain
+      }
+      break;
+    }
+    case TargetLowering::Legal:
+      if (Tmp1 != Node->getOperand(0) ||
+          getTypeAction(Node->getOperand(1).getValueType()) == Promote) {
+        SmallVector<SDOperand, 8> Ops;
+        Ops.push_back(Tmp1);
+        if (getTypeAction(Node->getOperand(1).getValueType()) == Legal) {
+          Ops.push_back(Node->getOperand(1));  // line # must be legal.
+          Ops.push_back(Node->getOperand(2));  // col # must be legal.
+        } else {
+          // Otherwise promote them.
+          Ops.push_back(PromoteOp(Node->getOperand(1)));
+          Ops.push_back(PromoteOp(Node->getOperand(2)));
+        }
+        Ops.push_back(Node->getOperand(3));  // filename must be legal.
+        Ops.push_back(Node->getOperand(4));  // working dir # must be legal.
+        Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      }
+      break;
+    }
+    break;
+    
+  case ISD::DEBUG_LOC:
+    assert(Node->getNumOperands() == 4 && "Invalid DEBUG_LOC node!");
+    switch (TLI.getOperationAction(ISD::DEBUG_LOC, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+      Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the line #.
+      Tmp3 = LegalizeOp(Node->getOperand(2));  // Legalize the col #.
+      Tmp4 = LegalizeOp(Node->getOperand(3));  // Legalize the source file id.
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4);
+      break;
+    }
+    break;    
+
+  case ISD::LABEL:
+    assert(Node->getNumOperands() == 2 && "Invalid LABEL node!");
+    switch (TLI.getOperationAction(ISD::LABEL, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+      Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the label id.
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+      break;
+    case TargetLowering::Expand:
+      Result = LegalizeOp(Node->getOperand(0));
+      break;
+    }
+    break;
+
+  case ISD::Constant:
+    // We know we don't need to expand constants here, constants only have one
+    // value and we check that it is fine above.
+
+    // FIXME: Maybe we should handle things like targets that don't support full
+    // 32-bit immediates?
+    break;
+  case ISD::ConstantFP: {
+    // Spill FP immediates to the constant pool if the target cannot directly
+    // codegen them.  Targets often have some immediate values that can be
+    // efficiently generated into an FP register without a load.  We explicitly
+    // leave these constants as ConstantFP nodes for the target to deal with.
+    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+
+    // Check to see if this FP immediate is already legal.
+    bool isLegal = false;
+    for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(),
+           E = TLI.legal_fpimm_end(); I != E; ++I)
+      if (CFP->isExactlyValue(*I)) {
+        isLegal = true;
+        break;
+      }
+
+    // If this is a legal constant, turn it into a TargetConstantFP node.
+    if (isLegal) {
+      Result = DAG.getTargetConstantFP(CFP->getValue(), CFP->getValueType(0));
+      break;
+    }
+
+    switch (TLI.getOperationAction(ISD::ConstantFP, CFP->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = ExpandConstantFP(CFP, true, DAG, TLI);
+    }
+    break;
+  }
+  case ISD::TokenFactor:
+    if (Node->getNumOperands() == 2) {
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+      Tmp2 = LegalizeOp(Node->getOperand(1));
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    } else if (Node->getNumOperands() == 3) {
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+      Tmp2 = LegalizeOp(Node->getOperand(1));
+      Tmp3 = LegalizeOp(Node->getOperand(2));
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+    } else {
+      SmallVector<SDOperand, 8> Ops;
+      // Legalize the operands.
+      for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+        Ops.push_back(LegalizeOp(Node->getOperand(i)));
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+    }
+    break;
+    
+  case ISD::FORMAL_ARGUMENTS:
+  case ISD::CALL:
+    // The only option for this is to custom lower it.
+    Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG);
+    assert(Tmp3.Val && "Target didn't custom lower this node!");
+    assert(Tmp3.Val->getNumValues() == Result.Val->getNumValues() &&
+           "Lowering call/formal_arguments produced unexpected # results!");
+    
+    // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to
+    // remember that we legalized all of them, so it doesn't get relegalized.
+    for (unsigned i = 0, e = Tmp3.Val->getNumValues(); i != e; ++i) {
+      Tmp1 = LegalizeOp(Tmp3.getValue(i));
+      if (Op.ResNo == i)
+        Tmp2 = Tmp1;
+      AddLegalizedOperand(SDOperand(Node, i), Tmp1);
+    }
+    return Tmp2;
+        
+  case ISD::BUILD_VECTOR:
+    switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = ExpandBUILD_VECTOR(Result.Val);
+      break;
+    }
+    break;
+  case ISD::INSERT_VECTOR_ELT:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // InVec
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // InVal
+    Tmp3 = LegalizeOp(Node->getOperand(2));  // InEltNo
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+    
+    switch (TLI.getOperationAction(ISD::INSERT_VECTOR_ELT,
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand: {
+      // If the insert index is a constant, codegen this as a scalar_to_vector,
+      // then a shuffle that inserts it into the right position in the vector.
+      if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Tmp3)) {
+        SDOperand ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, 
+                                      Tmp1.getValueType(), Tmp2);
+        
+        unsigned NumElts = MVT::getVectorNumElements(Tmp1.getValueType());
+        MVT::ValueType ShufMaskVT = MVT::getIntVectorWithNumElements(NumElts);
+        MVT::ValueType ShufMaskEltVT = MVT::getVectorElementType(ShufMaskVT);
+        
+        // We generate a shuffle of InVec and ScVec, so the shuffle mask should
+        // be 0,1,2,3,4,5... with the appropriate element replaced with elt 0 of
+        // the RHS.
+        SmallVector<SDOperand, 8> ShufOps;
+        for (unsigned i = 0; i != NumElts; ++i) {
+          if (i != InsertPos->getValue())
+            ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT));
+          else
+            ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT));
+        }
+        SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMaskVT,
+                                         &ShufOps[0], ShufOps.size());
+        
+        Result = DAG.getNode(ISD::VECTOR_SHUFFLE, Tmp1.getValueType(),
+                             Tmp1, ScVec, ShufMask);
+        Result = LegalizeOp(Result);
+        break;
+      }
+      
+      // If the target doesn't support this, we have to spill the input vector
+      // to a temporary stack slot, update the element, then reload it.  This is
+      // badness.  We could also load the value into a vector register (either
+      // with a "move to register" or "extload into register" instruction, then
+      // permute it into place, if the idx is a constant and if the idx is
+      // supported by the target.
+      MVT::ValueType VT    = Tmp1.getValueType();
+      MVT::ValueType EltVT = Tmp2.getValueType();
+      MVT::ValueType IdxVT = Tmp3.getValueType();
+      MVT::ValueType PtrVT = TLI.getPointerTy();
+      SDOperand StackPtr = CreateStackTemporary(VT);
+      // Store the vector.
+      SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Tmp1, StackPtr, NULL, 0);
+
+      // Truncate or zero extend offset to target pointer type.
+      unsigned CastOpc = (IdxVT > PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+      Tmp3 = DAG.getNode(CastOpc, PtrVT, Tmp3);
+      // Add the offset to the index.
+      unsigned EltSize = MVT::getSizeInBits(EltVT)/8;
+      Tmp3 = DAG.getNode(ISD::MUL, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+      SDOperand StackPtr2 = DAG.getNode(ISD::ADD, IdxVT, Tmp3, StackPtr);
+      // Store the scalar value.
+      Ch = DAG.getStore(Ch, Tmp2, StackPtr2, NULL, 0);
+      // Load the updated vector.
+      Result = DAG.getLoad(VT, Ch, StackPtr, NULL, 0);
+      break;
+    }
+    }
+    break;
+  case ISD::SCALAR_TO_VECTOR:
+    if (!TLI.isTypeLegal(Node->getOperand(0).getValueType())) {
+      Result = LegalizeOp(ExpandSCALAR_TO_VECTOR(Node));
+      break;
+    }
+    
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // InVal
+    Result = DAG.UpdateNodeOperands(Result, Tmp1);
+    switch (TLI.getOperationAction(ISD::SCALAR_TO_VECTOR,
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = LegalizeOp(ExpandSCALAR_TO_VECTOR(Node));
+      break;
+    }
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // Legalize the input vectors,
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // but not the shuffle mask.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+
+    // Allow targets to custom lower the SHUFFLEs they support.
+    switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE,Result.getValueType())) {
+    default: assert(0 && "Unknown operation action!");
+    case TargetLowering::Legal:
+      assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) &&
+             "vector shuffle should not be created if not legal!");
+      break;
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand: {
+      MVT::ValueType VT = Node->getValueType(0);
+      MVT::ValueType EltVT = MVT::getVectorElementType(VT);
+      MVT::ValueType PtrVT = TLI.getPointerTy();
+      SDOperand Mask = Node->getOperand(2);
+      unsigned NumElems = Mask.getNumOperands();
+      SmallVector<SDOperand,8> Ops;
+      for (unsigned i = 0; i != NumElems; ++i) {
+        SDOperand Arg = Mask.getOperand(i);
+        if (Arg.getOpcode() == ISD::UNDEF) {
+          Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+        } else {
+          assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+          unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
+          if (Idx < NumElems)
+            Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1,
+                                      DAG.getConstant(Idx, PtrVT)));
+          else
+            Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2,
+                                      DAG.getConstant(Idx - NumElems, PtrVT)));
+        }
+      }
+      Result = DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+      break;
+    }
+    case TargetLowering::Promote: {
+      // Change base type to a different vector type.
+      MVT::ValueType OVT = Node->getValueType(0);
+      MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+
+      // Cast the two input vectors.
+      Tmp1 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp1);
+      Tmp2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp2);
+      
+      // Convert the shuffle mask to the right # elements.
+      Tmp3 = SDOperand(isShuffleLegal(OVT, Node->getOperand(2)), 0);
+      assert(Tmp3.Val && "Shuffle not legal?");
+      Result = DAG.getNode(ISD::VECTOR_SHUFFLE, NVT, Tmp1, Tmp2, Tmp3);
+      Result = DAG.getNode(ISD::BIT_CONVERT, OVT, Result);
+      break;
+    }
+    }
+    break;
+  
+  case ISD::EXTRACT_VECTOR_ELT:
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    Result = ExpandEXTRACT_VECTOR_ELT(Result);
+    break;
+
+  case ISD::EXTRACT_SUBVECTOR: 
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    Result = ExpandEXTRACT_SUBVECTOR(Result);
+    break;
+    
+  case ISD::CALLSEQ_START: {
+    SDNode *CallEnd = FindCallEndFromCallStart(Node);
+    
+    // Recursively Legalize all of the inputs of the call end that do not lead
+    // to this call start.  This ensures that any libcalls that need be inserted
+    // are inserted *before* the CALLSEQ_START.
+    {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+    for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+      LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).Val, Node,
+                                   NodesLeadingTo);
+    }
+
+    // Now that we legalized all of the inputs (which may have inserted
+    // libcalls) create the new CALLSEQ_START node.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    // Merge in the last call, to ensure that this call start after the last
+    // call ended.
+    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+      Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+      Tmp1 = LegalizeOp(Tmp1);
+    }
+      
+    // Do not try to legalize the target-specific arguments (#1+).
+    if (Tmp1 != Node->getOperand(0)) {
+      SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+      Ops[0] = Tmp1;
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+    }
+    
+    // Remember that the CALLSEQ_START is legalized.
+    AddLegalizedOperand(Op.getValue(0), Result);
+    if (Node->getNumValues() == 2)    // If this has a flag result, remember it.
+      AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+    
+    // Now that the callseq_start and all of the non-call nodes above this call
+    // sequence have been legalized, legalize the call itself.  During this 
+    // process, no libcalls can/will be inserted, guaranteeing that no calls
+    // can overlap.
+    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+    SDOperand InCallSEQ = LastCALLSEQ_END;
+    // Note that we are selecting this call!
+    LastCALLSEQ_END = SDOperand(CallEnd, 0);
+    IsLegalizingCall = true;
+    
+    // Legalize the call, starting from the CALLSEQ_END.
+    LegalizeOp(LastCALLSEQ_END);
+    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+    return Result;
+  }
+  case ISD::CALLSEQ_END:
+    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
+    // will cause this node to be legalized as well as handling libcalls right.
+    if (LastCALLSEQ_END.Val != Node) {
+      LegalizeOp(SDOperand(FindCallStartFromCallEnd(Node), 0));
+      DenseMap<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op);
+      assert(I != LegalizedNodes.end() &&
+             "Legalizing the call start should have legalized this node!");
+      return I->second;
+    }
+    
+    // Otherwise, the call start has been legalized and everything is going 
+    // according to plan.  Just legalize ourselves normally here.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Do not try to legalize the target-specific arguments (#1+), except for
+    // an optional flag input.
+    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+      if (Tmp1 != Node->getOperand(0)) {
+        SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      }
+    } else {
+      Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+      if (Tmp1 != Node->getOperand(0) ||
+          Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+        SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Ops.back() = Tmp2;
+        Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      }
+    }
+    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
+    // This finishes up call legalization.
+    IsLegalizingCall = false;
+    
+    // If the CALLSEQ_END node has a flag, remember that we legalized it.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    if (Node->getNumValues() == 2)
+      AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.ResNo);
+  case ISD::DYNAMIC_STACKALLOC: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the size.
+    Tmp3 = LegalizeOp(Node->getOperand(2));  // Legalize the alignment.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+
+    Tmp1 = Result.getValue(0);
+    Tmp2 = Result.getValue(1);
+    switch (TLI.getOperationAction(Node->getOpcode(),
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Expand: {
+      unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+      assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+             " not tell us which reg is the stack pointer!");
+      SDOperand Chain = Tmp1.getOperand(0);
+      SDOperand Size  = Tmp2.getOperand(1);
+      SDOperand SP = DAG.getCopyFromReg(Chain, SPReg, Node->getValueType(0));
+      Tmp1 = DAG.getNode(ISD::SUB, Node->getValueType(0), SP, Size);    // Value
+      Tmp2 = DAG.getCopyToReg(SP.getValue(1), SPReg, Tmp1);      // Output chain
+      Tmp1 = LegalizeOp(Tmp1);
+      Tmp2 = LegalizeOp(Tmp2);
+      break;
+    }
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Tmp1, DAG);
+      if (Tmp3.Val) {
+        Tmp1 = LegalizeOp(Tmp3);
+        Tmp2 = LegalizeOp(Tmp3.getValue(1));
+      }
+      break;
+    case TargetLowering::Legal:
+      break;
+    }
+    // Since this op produce two values, make sure to remember that we
+    // legalized both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+    AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+    return Op.ResNo ? Tmp2 : Tmp1;
+  }
+  case ISD::INLINEASM: {
+    SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+    bool Changed = false;
+    // Legalize all of the operands of the inline asm, in case they are nodes
+    // that need to be expanded or something.  Note we skip the asm string and
+    // all of the TargetConstant flags.
+    SDOperand Op = LegalizeOp(Ops[0]);
+    Changed = Op != Ops[0];
+    Ops[0] = Op;
+
+    bool HasInFlag = Ops.back().getValueType() == MVT::Flag;
+    for (unsigned i = 2, e = Ops.size()-HasInFlag; i < e; ) {
+      unsigned NumVals = cast<ConstantSDNode>(Ops[i])->getValue() >> 3;
+      for (++i; NumVals; ++i, --NumVals) {
+        SDOperand Op = LegalizeOp(Ops[i]);
+        if (Op != Ops[i]) {
+          Changed = true;
+          Ops[i] = Op;
+        }
+      }
+    }
+
+    if (HasInFlag) {
+      Op = LegalizeOp(Ops.back());
+      Changed |= Op != Ops.back();
+      Ops.back() = Op;
+    }
+    
+    if (Changed)
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+      
+    // INLINE asm returns a chain and flag, make sure to add both to the map.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.ResNo);
+  }
+  case ISD::BR:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a branch.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+    
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::BRIND:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a branch.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+    
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+    default: assert(0 && "Indirect target must be legal type (pointer)!");
+    case Legal:
+      Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the condition.
+      break;
+    }
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    break;
+  case ISD::BR_JT:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a branch.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the jumptable node.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+
+    switch (TLI.getOperationAction(ISD::BR_JT, MVT::Other)) {  
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Expand: {
+      SDOperand Chain = Result.getOperand(0);
+      SDOperand Table = Result.getOperand(1);
+      SDOperand Index = Result.getOperand(2);
+
+      MVT::ValueType PTy = TLI.getPointerTy();
+      MachineFunction &MF = DAG.getMachineFunction();
+      unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();
+      Index= DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(EntrySize, PTy));
+      SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table);
+      
+      SDOperand LD;
+      switch (EntrySize) {
+      default: assert(0 && "Size of jump table not supported yet."); break;
+      case 4: LD = DAG.getLoad(MVT::i32, Chain, Addr, NULL, 0); break;
+      case 8: LD = DAG.getLoad(MVT::i64, Chain, Addr, NULL, 0); break;
+      }
+
+      if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+        // For PIC, the sequence is:
+        // BRIND(load(Jumptable + index) + RelocBase)
+        // RelocBase is the JumpTable on PPC and X86, GOT on Alpha
+        SDOperand Reloc;
+        if (TLI.usesGlobalOffsetTable())
+          Reloc = DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, PTy);
+        else
+          Reloc = Table;
+        Addr = (PTy != MVT::i32) ? DAG.getNode(ISD::SIGN_EXTEND, PTy, LD) : LD;
+        Addr = DAG.getNode(ISD::ADD, PTy, Addr, Reloc);
+        Result = DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), Addr);
+      } else {
+        Result = DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), LD);
+      }
+    }
+    }
+    break;
+  case ISD::BRCOND:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a return.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+    case Expand: assert(0 && "It's impossible to expand bools");
+    case Legal:
+      Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the condition.
+      break;
+    case Promote:
+      Tmp2 = PromoteOp(Node->getOperand(1));  // Promote the condition.
+      
+      // The top bits of the promoted condition are not necessarily zero, ensure
+      // that the value is properly zero extended.
+      if (!DAG.MaskedValueIsZero(Tmp2, 
+                                 MVT::getIntVTBitMask(Tmp2.getValueType())^1))
+        Tmp2 = DAG.getZeroExtendInReg(Tmp2, MVT::i1);
+      break;
+    }
+
+    // Basic block destination (Op#2) is always legal.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+      
+    switch (TLI.getOperationAction(ISD::BRCOND, MVT::Other)) {  
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Expand:
+      // Expand brcond's setcc into its constituent parts and create a BR_CC
+      // Node.
+      if (Tmp2.getOpcode() == ISD::SETCC) {
+        Result = DAG.getNode(ISD::BR_CC, MVT::Other, Tmp1, Tmp2.getOperand(2),
+                             Tmp2.getOperand(0), Tmp2.getOperand(1),
+                             Node->getOperand(2));
+      } else {
+        Result = DAG.getNode(ISD::BR_CC, MVT::Other, Tmp1, 
+                             DAG.getCondCode(ISD::SETNE), Tmp2,
+                             DAG.getConstant(0, Tmp2.getValueType()),
+                             Node->getOperand(2));
+      }
+      break;
+    }
+    break;
+  case ISD::BR_CC:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Ensure that libcalls are emitted before a branch.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    Tmp2 = Node->getOperand(2);              // LHS 
+    Tmp3 = Node->getOperand(3);              // RHS
+    Tmp4 = Node->getOperand(1);              // CC
+
+    LegalizeSetCCOperands(Tmp2, Tmp3, Tmp4);
+    LastCALLSEQ_END = DAG.getEntryNode();
+
+    // If we didn't get both a LHS and RHS back from LegalizeSetCCOperands,
+    // the LHS is a legal SETCC itself.  In this case, we need to compare
+    // the result against zero to select between true and false values.
+    if (Tmp3.Val == 0) {
+      Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+      Tmp4 = DAG.getCondCode(ISD::SETNE);
+    }
+    
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp4, Tmp2, Tmp3, 
+                                    Node->getOperand(4));
+      
+    switch (TLI.getOperationAction(ISD::BR_CC, Tmp3.getValueType())) {
+    default: assert(0 && "Unexpected action for BR_CC!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp4 = TLI.LowerOperation(Result, DAG);
+      if (Tmp4.Val) Result = Tmp4;
+      break;
+    }
+    break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    Tmp1 = LegalizeOp(LD->getChain());   // Legalize the chain.
+    Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    if (ExtType == ISD::NON_EXTLOAD) {
+      MVT::ValueType VT = Node->getValueType(0);
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+      Tmp3 = Result.getValue(0);
+      Tmp4 = Result.getValue(1);
+    
+      switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Legal: break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+        if (Tmp1.Val) {
+          Tmp3 = LegalizeOp(Tmp1);
+          Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        }
+        break;
+      case TargetLowering::Promote: {
+        // Only promote a load of vector type to another.
+        assert(MVT::isVector(VT) && "Cannot promote this load!");
+        // Change base type to a different vector type.
+        MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+        Tmp1 = DAG.getLoad(NVT, Tmp1, Tmp2, LD->getSrcValue(),
+                           LD->getSrcValueOffset(),
+                           LD->isVolatile(), LD->getAlignment());
+        Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, VT, Tmp1));
+        Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        break;
+      }
+      }
+      // Since loads produce two values, make sure to remember that we 
+      // legalized both of them.
+      AddLegalizedOperand(SDOperand(Node, 0), Tmp3);
+      AddLegalizedOperand(SDOperand(Node, 1), Tmp4);
+      return Op.ResNo ? Tmp4 : Tmp3;
+    } else {
+      MVT::ValueType SrcVT = LD->getLoadedVT();
+      switch (TLI.getLoadXAction(ExtType, SrcVT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Promote:
+        assert(SrcVT == MVT::i1 &&
+               "Can only promote extending LOAD from i1 -> i8!");
+        Result = DAG.getExtLoad(ExtType, Node->getValueType(0), Tmp1, Tmp2,
+                                LD->getSrcValue(), LD->getSrcValueOffset(),
+                                MVT::i8, LD->isVolatile(), LD->getAlignment());
+      Tmp1 = Result.getValue(0);
+      Tmp2 = Result.getValue(1);
+      break;
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+        Tmp1 = Result.getValue(0);
+        Tmp2 = Result.getValue(1);
+      
+        if (isCustom) {
+          Tmp3 = TLI.LowerOperation(Result, DAG);
+          if (Tmp3.Val) {
+            Tmp1 = LegalizeOp(Tmp3);
+            Tmp2 = LegalizeOp(Tmp3.getValue(1));
+          }
+        }
+        break;
+      case TargetLowering::Expand:
+        // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+        if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) {
+          SDOperand Load = DAG.getLoad(SrcVT, Tmp1, Tmp2, LD->getSrcValue(),
+                                       LD->getSrcValueOffset(),
+                                       LD->isVolatile(), LD->getAlignment());
+          Result = DAG.getNode(ISD::FP_EXTEND, Node->getValueType(0), Load);
+          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Load.getValue(1));
+          break;
+        }
+        assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+        // Turn the unsupported load into an EXTLOAD followed by an explicit
+        // zero/sign extend inreg.
+        Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0),
+                                Tmp1, Tmp2, LD->getSrcValue(),
+                                LD->getSrcValueOffset(), SrcVT,
+                                LD->isVolatile(), LD->getAlignment());
+        SDOperand ValRes;
+        if (ExtType == ISD::SEXTLOAD)
+          ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+                               Result, DAG.getValueType(SrcVT));
+        else
+          ValRes = DAG.getZeroExtendInReg(Result, SrcVT);
+        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+        break;
+      }
+      // Since loads produce two values, make sure to remember that we legalized
+      // both of them.
+      AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+      AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+      return Op.ResNo ? Tmp2 : Tmp1;
+    }
+  }
+  case ISD::EXTRACT_ELEMENT: {
+    MVT::ValueType OpTy = Node->getOperand(0).getValueType();
+    switch (getTypeAction(OpTy)) {
+    default: assert(0 && "EXTRACT_ELEMENT action for type unimplemented!");
+    case Legal:
+      if (cast<ConstantSDNode>(Node->getOperand(1))->getValue()) {
+        // 1 -> Hi
+        Result = DAG.getNode(ISD::SRL, OpTy, Node->getOperand(0),
+                             DAG.getConstant(MVT::getSizeInBits(OpTy)/2, 
+                                             TLI.getShiftAmountTy()));
+        Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Result);
+      } else {
+        // 0 -> Lo
+        Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), 
+                             Node->getOperand(0));
+      }
+      break;
+    case Expand:
+      // Get both the low and high parts.
+      ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+      if (cast<ConstantSDNode>(Node->getOperand(1))->getValue())
+        Result = Tmp2;  // 1 -> Hi
+      else
+        Result = Tmp1;  // 0 -> Lo
+      break;
+    }
+    break;
+  }
+
+  case ISD::CopyToReg:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    assert(isTypeLegal(Node->getOperand(2).getValueType()) &&
+           "Register type must be legal!");
+    // Legalize the incoming value (must be a legal type).
+    Tmp2 = LegalizeOp(Node->getOperand(2));
+    if (Node->getNumValues() == 1) {
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1), Tmp2);
+    } else {
+      assert(Node->getNumValues() == 2 && "Unknown CopyToReg");
+      if (Node->getNumOperands() == 4) {
+        Tmp3 = LegalizeOp(Node->getOperand(3));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1), Tmp2,
+                                        Tmp3);
+      } else {
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1),Tmp2);
+      }
+      
+      // Since this produces two values, make sure to remember that we legalized
+      // both of them.
+      AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+      AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+      return Result;
+    }
+    break;
+
+  case ISD::RET:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    // Ensure that libcalls are emitted before a return.
+    Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+    Tmp1 = LegalizeOp(Tmp1);
+    LastCALLSEQ_END = DAG.getEntryNode();
+      
+    switch (Node->getNumOperands()) {
+    case 3:  // ret val
+      Tmp2 = Node->getOperand(1);
+      Tmp3 = Node->getOperand(2);  // Signness
+      switch (getTypeAction(Tmp2.getValueType())) {
+      case Legal:
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, LegalizeOp(Tmp2), Tmp3);
+        break;
+      case Expand:
+        if (!MVT::isVector(Tmp2.getValueType())) {
+          SDOperand Lo, Hi;
+          ExpandOp(Tmp2, Lo, Hi);
+
+          // Big endian systems want the hi reg first.
+          if (!TLI.isLittleEndian())
+            std::swap(Lo, Hi);
+          
+          if (Hi.Val)
+            Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3, Hi,Tmp3);
+          else
+            Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3);
+          Result = LegalizeOp(Result);
+        } else {
+          SDNode *InVal = Tmp2.Val;
+          unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+          MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+          
+          // Figure out if there is a simple type corresponding to this Vector
+          // type.  If so, convert to the vector type.
+          MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+          if (TLI.isTypeLegal(TVT)) {
+            // Turn this into a return of the vector type.
+            Tmp2 = LegalizeOp(Tmp2);
+            Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+          } else if (NumElems == 1) {
+            // Turn this into a return of the scalar type.
+            Tmp2 = ScalarizeVectorOp(Tmp2);
+            Tmp2 = LegalizeOp(Tmp2);
+            Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+            
+            // FIXME: Returns of gcc generic vectors smaller than a legal type
+            // should be returned in integer registers!
+            
+            // The scalarized value type may not be legal, e.g. it might require
+            // promotion or expansion.  Relegalize the return.
+            Result = LegalizeOp(Result);
+          } else {
+            // FIXME: Returns of gcc generic vectors larger than a legal vector
+            // type should be returned by reference!
+            SDOperand Lo, Hi;
+            SplitVectorOp(Tmp2, Lo, Hi);
+            Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3, Hi,Tmp3);
+            Result = LegalizeOp(Result);
+          }
+        }
+        break;
+      case Promote:
+        Tmp2 = PromoteOp(Node->getOperand(1));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+        Result = LegalizeOp(Result);
+        break;
+      }
+      break;
+    case 1:  // ret void
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    default: { // ret <values>
+      SmallVector<SDOperand, 8> NewValues;
+      NewValues.push_back(Tmp1);
+      for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2)
+        switch (getTypeAction(Node->getOperand(i).getValueType())) {
+        case Legal:
+          NewValues.push_back(LegalizeOp(Node->getOperand(i)));
+          NewValues.push_back(Node->getOperand(i+1));
+          break;
+        case Expand: {
+          SDOperand Lo, Hi;
+          assert(!MVT::isExtendedVT(Node->getOperand(i).getValueType()) &&
+                 "FIXME: TODO: implement returning non-legal vector types!");
+          ExpandOp(Node->getOperand(i), Lo, Hi);
+          NewValues.push_back(Lo);
+          NewValues.push_back(Node->getOperand(i+1));
+          if (Hi.Val) {
+            NewValues.push_back(Hi);
+            NewValues.push_back(Node->getOperand(i+1));
+          }
+          break;
+        }
+        case Promote:
+          assert(0 && "Can't promote multiple return value yet!");
+        }
+          
+      if (NewValues.size() == Node->getNumOperands())
+        Result = DAG.UpdateNodeOperands(Result, &NewValues[0],NewValues.size());
+      else
+        Result = DAG.getNode(ISD::RET, MVT::Other,
+                             &NewValues[0], NewValues.size());
+      break;
+    }
+    }
+
+    if (Result.getOpcode() == ISD::RET) {
+      switch (TLI.getOperationAction(Result.getOpcode(), MVT::Other)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Legal: break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+        break;
+      }
+    }
+    break;
+  case ISD::STORE: {
+    StoreSDNode *ST = cast<StoreSDNode>(Node);
+    Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
+    Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
+    int SVOffset = ST->getSrcValueOffset();
+    unsigned Alignment = ST->getAlignment();
+    bool isVolatile = ST->isVolatile();
+
+    if (!ST->isTruncatingStore()) {
+      // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+      // FIXME: We shouldn't do this for TargetConstantFP's.
+      // FIXME: move this to the DAG Combiner!  Note that we can't regress due
+      // to phase ordering between legalized code and the dag combiner.  This
+      // probably means that we need to integrate dag combiner and legalizer
+      // together.
+      if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+        if (CFP->getValueType(0) == MVT::f32) {
+          Tmp3 = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32);
+        } else {
+          assert(CFP->getValueType(0) == MVT::f64 && "Unknown FP type!");
+          Tmp3 = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64);
+        }
+        Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                              SVOffset, isVolatile, Alignment);
+        break;
+      }
+      
+      switch (getTypeAction(ST->getStoredVT())) {
+      case Legal: {
+        Tmp3 = LegalizeOp(ST->getValue());
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, 
+                                        ST->getOffset());
+
+        MVT::ValueType VT = Tmp3.getValueType();
+        switch (TLI.getOperationAction(ISD::STORE, VT)) {
+        default: assert(0 && "This action is not supported yet!");
+        case TargetLowering::Legal:  break;
+        case TargetLowering::Custom:
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.Val) Result = Tmp1;
+          break;
+        case TargetLowering::Promote:
+          assert(MVT::isVector(VT) && "Unknown legal promote case!");
+          Tmp3 = DAG.getNode(ISD::BIT_CONVERT, 
+                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+          Result = DAG.getStore(Tmp1, Tmp3, Tmp2,
+                                ST->getSrcValue(), SVOffset, isVolatile,
+                                Alignment);
+          break;
+        }
+        break;
+      }
+      case Promote:
+        // Truncate the value and store the result.
+        Tmp3 = PromoteOp(ST->getValue());
+        Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                   SVOffset, ST->getStoredVT(),
+                                   isVolatile, Alignment);
+        break;
+
+      case Expand:
+        unsigned IncrementSize = 0;
+        SDOperand Lo, Hi;
+      
+        // If this is a vector type, then we have to calculate the increment as
+        // the product of the element size in bytes, and the number of elements
+        // in the high half of the vector.
+        if (MVT::isVector(ST->getValue().getValueType())) {
+          SDNode *InVal = ST->getValue().Val;
+          unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+          MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+
+          // Figure out if there is a simple type corresponding to this Vector
+          // type.  If so, convert to the vector type.
+          MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+          if (TLI.isTypeLegal(TVT)) {
+            // Turn this into a normal store of the vector type.
+            Tmp3 = LegalizeOp(Node->getOperand(1));
+            Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                  SVOffset, isVolatile, Alignment);
+            Result = LegalizeOp(Result);
+            break;
+          } else if (NumElems == 1) {
+            // Turn this into a normal store of the scalar type.
+            Tmp3 = ScalarizeVectorOp(Node->getOperand(1));
+            Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                  SVOffset, isVolatile, Alignment);
+            // The scalarized value type may not be legal, e.g. it might require
+            // promotion or expansion.  Relegalize the scalar store.
+            Result = LegalizeOp(Result);
+            break;
+          } else {
+            SplitVectorOp(Node->getOperand(1), Lo, Hi);
+            IncrementSize = NumElems/2 * MVT::getSizeInBits(EVT)/8;
+          }
+        } else {
+          ExpandOp(Node->getOperand(1), Lo, Hi);
+          IncrementSize = Hi.Val ? MVT::getSizeInBits(Hi.getValueType())/8 : 0;
+
+          if (!TLI.isLittleEndian())
+            std::swap(Lo, Hi);
+        }
+
+        Lo = DAG.getStore(Tmp1, Lo, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+
+        if (Hi.Val == NULL) {
+          // Must be int <-> float one-to-one expansion.
+          Result = Lo;
+          break;
+        }
+
+        Tmp2 = DAG.getNode(ISD::ADD, Tmp2.getValueType(), Tmp2,
+                           getIntPtrConstant(IncrementSize));
+        assert(isTypeLegal(Tmp2.getValueType()) &&
+               "Pointers must be legal!");
+        SVOffset += IncrementSize;
+        if (Alignment > IncrementSize)
+          Alignment = IncrementSize;
+        Hi = DAG.getStore(Tmp1, Hi, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+        Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo, Hi);
+        break;
+      }
+    } else {
+      // Truncating store
+      assert(isTypeLegal(ST->getValue().getValueType()) &&
+             "Cannot handle illegal TRUNCSTORE yet!");
+      Tmp3 = LegalizeOp(ST->getValue());
+    
+      // The only promote case we handle is TRUNCSTORE:i1 X into
+      //   -> TRUNCSTORE:i8 (and X, 1)
+      if (ST->getStoredVT() == MVT::i1 &&
+          TLI.getStoreXAction(MVT::i1) == TargetLowering::Promote) {
+        // Promote the bool to a mask then store.
+        Tmp3 = DAG.getNode(ISD::AND, Tmp3.getValueType(), Tmp3,
+                           DAG.getConstant(1, Tmp3.getValueType()));
+        Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                   SVOffset, MVT::i8,
+                                   isVolatile, Alignment);
+      } else if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+                 Tmp2 != ST->getBasePtr()) {
+        Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+                                        ST->getOffset());
+      }
+
+      MVT::ValueType StVT = cast<StoreSDNode>(Result.Val)->getStoredVT();
+      switch (TLI.getStoreXAction(StVT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Legal: break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+        break;
+      }
+    }
+    break;
+  }
+  case ISD::PCMARKER:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::STACKSAVE:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1);
+    Tmp1 = Result.getValue(0);
+    Tmp2 = Result.getValue(1);
+    
+    switch (TLI.getOperationAction(ISD::STACKSAVE, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.Val) {
+        Tmp1 = LegalizeOp(Tmp3);
+        Tmp2 = LegalizeOp(Tmp3.getValue(1));
+      }
+      break;
+    case TargetLowering::Expand:
+      // Expand to CopyFromReg if the target set 
+      // StackPointerRegisterToSaveRestore.
+      if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+        Tmp1 = DAG.getCopyFromReg(Result.getOperand(0), SP,
+                                  Node->getValueType(0));
+        Tmp2 = Tmp1.getValue(1);
+      } else {
+        Tmp1 = DAG.getNode(ISD::UNDEF, Node->getValueType(0));
+        Tmp2 = Node->getOperand(0);
+      }
+      break;
+    }
+
+    // Since stacksave produce two values, make sure to remember that we
+    // legalized both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+    AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+    return Op.ResNo ? Tmp2 : Tmp1;
+
+  case ISD::STACKRESTORE:
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+      
+    switch (TLI.getOperationAction(ISD::STACKRESTORE, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Expand:
+      // Expand to CopyToReg if the target set 
+      // StackPointerRegisterToSaveRestore.
+      if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+        Result = DAG.getCopyToReg(Tmp1, SP, Tmp2);
+      } else {
+        Result = Tmp1;
+      }
+      break;
+    }
+    break;
+
+  case ISD::READCYCLECOUNTER:
+    Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain
+    Result = DAG.UpdateNodeOperands(Result, Tmp1);
+    switch (TLI.getOperationAction(ISD::READCYCLECOUNTER,
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal:
+      Tmp1 = Result.getValue(0);
+      Tmp2 = Result.getValue(1);
+      break;
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Result, DAG);
+      Tmp1 = LegalizeOp(Result.getValue(0));
+      Tmp2 = LegalizeOp(Result.getValue(1));
+      break;
+    }
+
+    // Since rdcc produce two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+    AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+    return Result;
+
+  case ISD::SELECT:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Expand: assert(0 && "It's impossible to expand bools");
+    case Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the condition.
+      break;
+    case Promote:
+      Tmp1 = PromoteOp(Node->getOperand(0));  // Promote the condition.
+      // Make sure the condition is either zero or one.
+      if (!DAG.MaskedValueIsZero(Tmp1,
+                                 MVT::getIntVTBitMask(Tmp1.getValueType())^1))
+        Tmp1 = DAG.getZeroExtendInReg(Tmp1, MVT::i1);
+      break;
+    }
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // TrueVal
+    Tmp3 = LegalizeOp(Node->getOperand(2));   // FalseVal
+
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+      
+    switch (TLI.getOperationAction(ISD::SELECT, Tmp2.getValueType())) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom: {
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    }
+    case TargetLowering::Expand:
+      if (Tmp1.getOpcode() == ISD::SETCC) {
+        Result = DAG.getSelectCC(Tmp1.getOperand(0), Tmp1.getOperand(1), 
+                              Tmp2, Tmp3,
+                              cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+      } else {
+        Result = DAG.getSelectCC(Tmp1, 
+                                 DAG.getConstant(0, Tmp1.getValueType()),
+                                 Tmp2, Tmp3, ISD::SETNE);
+      }
+      break;
+    case TargetLowering::Promote: {
+      MVT::ValueType NVT =
+        TLI.getTypeToPromoteTo(ISD::SELECT, Tmp2.getValueType());
+      unsigned ExtOp, TruncOp;
+      if (MVT::isVector(Tmp2.getValueType())) {
+        ExtOp   = ISD::BIT_CONVERT;
+        TruncOp = ISD::BIT_CONVERT;
+      } else if (MVT::isInteger(Tmp2.getValueType())) {
+        ExtOp   = ISD::ANY_EXTEND;
+        TruncOp = ISD::TRUNCATE;
+      } else {
+        ExtOp   = ISD::FP_EXTEND;
+        TruncOp = ISD::FP_ROUND;
+      }
+      // Promote each of the values to the new type.
+      Tmp2 = DAG.getNode(ExtOp, NVT, Tmp2);
+      Tmp3 = DAG.getNode(ExtOp, NVT, Tmp3);
+      // Perform the larger operation, then round down.
+      Result = DAG.getNode(ISD::SELECT, NVT, Tmp1, Tmp2,Tmp3);
+      Result = DAG.getNode(TruncOp, Node->getValueType(0), Result);
+      break;
+    }
+    }
+    break;
+  case ISD::SELECT_CC: {
+    Tmp1 = Node->getOperand(0);               // LHS
+    Tmp2 = Node->getOperand(1);               // RHS
+    Tmp3 = LegalizeOp(Node->getOperand(2));   // True
+    Tmp4 = LegalizeOp(Node->getOperand(3));   // False
+    SDOperand CC = Node->getOperand(4);
+    
+    LegalizeSetCCOperands(Tmp1, Tmp2, CC);
+    
+    // If we didn't get both a LHS and RHS back from LegalizeSetCCOperands,
+    // the LHS is a legal SETCC itself.  In this case, we need to compare
+    // the result against zero to select between true and false values.
+    if (Tmp2.Val == 0) {
+      Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+      CC = DAG.getCondCode(ISD::SETNE);
+    }
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, CC);
+
+    // Everything is legal, see if we should expand this op or something.
+    switch (TLI.getOperationAction(ISD::SELECT_CC, Tmp3.getValueType())) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    }
+    break;
+  }
+  case ISD::SETCC:
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = Node->getOperand(2);
+    LegalizeSetCCOperands(Tmp1, Tmp2, Tmp3);
+    
+    // If we had to Expand the SetCC operands into a SELECT node, then it may 
+    // not always be possible to return a true LHS & RHS.  In this case, just 
+    // return the value we legalized, returned in the LHS
+    if (Tmp2.Val == 0) {
+      Result = Tmp1;
+      break;
+    }
+
+    switch (TLI.getOperationAction(ISD::SETCC, Tmp1.getValueType())) {
+    default: assert(0 && "Cannot handle this action for SETCC yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH.
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+      if (isCustom) {
+        Tmp4 = TLI.LowerOperation(Result, DAG);
+        if (Tmp4.Val) Result = Tmp4;
+      }
+      break;
+    case TargetLowering::Promote: {
+      // First step, figure out the appropriate operation to use.
+      // Allow SETCC to not be supported for all legal data types
+      // Mostly this targets FP
+      MVT::ValueType NewInTy = Node->getOperand(0).getValueType();
+      MVT::ValueType OldVT = NewInTy; OldVT = OldVT;
+
+      // Scan for the appropriate larger type to use.
+      while (1) {
+        NewInTy = (MVT::ValueType)(NewInTy+1);
+
+        assert(MVT::isInteger(NewInTy) == MVT::isInteger(OldVT) &&
+               "Fell off of the edge of the integer world");
+        assert(MVT::isFloatingPoint(NewInTy) == MVT::isFloatingPoint(OldVT) &&
+               "Fell off of the edge of the floating point world");
+          
+        // If the target supports SETCC of this type, use it.
+        if (TLI.isOperationLegal(ISD::SETCC, NewInTy))
+          break;
+      }
+      if (MVT::isInteger(NewInTy))
+        assert(0 && "Cannot promote Legal Integer SETCC yet");
+      else {
+        Tmp1 = DAG.getNode(ISD::FP_EXTEND, NewInTy, Tmp1);
+        Tmp2 = DAG.getNode(ISD::FP_EXTEND, NewInTy, Tmp2);
+      }
+      Tmp1 = LegalizeOp(Tmp1);
+      Tmp2 = LegalizeOp(Tmp2);
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+      Result = LegalizeOp(Result);
+      break;
+    }
+    case TargetLowering::Expand:
+      // Expand a setcc node into a select_cc of the same condition, lhs, and
+      // rhs that selects between const 1 (true) and const 0 (false).
+      MVT::ValueType VT = Node->getValueType(0);
+      Result = DAG.getNode(ISD::SELECT_CC, VT, Tmp1, Tmp2, 
+                           DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                           Tmp3);
+      break;
+    }
+    break;
+  case ISD::MEMSET:
+  case ISD::MEMCPY:
+  case ISD::MEMMOVE: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));      // Chain
+    Tmp2 = LegalizeOp(Node->getOperand(1));      // Pointer
+
+    if (Node->getOpcode() == ISD::MEMSET) {      // memset = ubyte
+      switch (getTypeAction(Node->getOperand(2).getValueType())) {
+      case Expand: assert(0 && "Cannot expand a byte!");
+      case Legal:
+        Tmp3 = LegalizeOp(Node->getOperand(2));
+        break;
+      case Promote:
+        Tmp3 = PromoteOp(Node->getOperand(2));
+        break;
+      }
+    } else {
+      Tmp3 = LegalizeOp(Node->getOperand(2));    // memcpy/move = pointer,
+    }
+
+    SDOperand Tmp4;
+    switch (getTypeAction(Node->getOperand(3).getValueType())) {
+    case Expand: {
+      // Length is too big, just take the lo-part of the length.
+      SDOperand HiPart;
+      ExpandOp(Node->getOperand(3), Tmp4, HiPart);
+      break;
+    }
+    case Legal:
+      Tmp4 = LegalizeOp(Node->getOperand(3));
+      break;
+    case Promote:
+      Tmp4 = PromoteOp(Node->getOperand(3));
+      break;
+    }
+
+    SDOperand Tmp5;
+    switch (getTypeAction(Node->getOperand(4).getValueType())) {  // uint
+    case Expand: assert(0 && "Cannot expand this yet!");
+    case Legal:
+      Tmp5 = LegalizeOp(Node->getOperand(4));
+      break;
+    case Promote:
+      Tmp5 = PromoteOp(Node->getOperand(4));
+      break;
+    }
+
+    switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
+    default: assert(0 && "This action not implemented for this operation!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, Tmp5);
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand: {
+      // Otherwise, the target does not support this operation.  Lower the
+      // operation to an explicit libcall as appropriate.
+      MVT::ValueType IntPtr = TLI.getPointerTy();
+      const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+      TargetLowering::ArgListTy Args;
+      TargetLowering::ArgListEntry Entry;
+
+      const char *FnName = 0;
+      if (Node->getOpcode() == ISD::MEMSET) {
+        Entry.Node = Tmp2; Entry.Ty = IntPtrTy;
+        Args.push_back(Entry);
+        // Extend the (previously legalized) ubyte argument to be an int value
+        // for the call.
+        if (Tmp3.getValueType() > MVT::i32)
+          Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3);
+        else
+          Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3);
+        Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+        Args.push_back(Entry);
+        Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+        Args.push_back(Entry);
+
+        FnName = "memset";
+      } else if (Node->getOpcode() == ISD::MEMCPY ||
+                 Node->getOpcode() == ISD::MEMMOVE) {
+        Entry.Ty = IntPtrTy;
+        Entry.Node = Tmp2; Args.push_back(Entry);
+        Entry.Node = Tmp3; Args.push_back(Entry);
+        Entry.Node = Tmp4; Args.push_back(Entry);
+        FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" : "memcpy";
+      } else {
+        assert(0 && "Unknown op!");
+      }
+
+      std::pair<SDOperand,SDOperand> CallResult =
+        TLI.LowerCallTo(Tmp1, Type::VoidTy, false, false, CallingConv::C, false,
+                        DAG.getExternalSymbol(FnName, IntPtr), Args, DAG);
+      Result = CallResult.second;
+      break;
+    }
+    }
+    break;
+  }
+
+  case ISD::SHL_PARTS:
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS: {
+    SmallVector<SDOperand, 8> Ops;
+    bool Changed = false;
+    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+      Changed |= Ops.back() != Node->getOperand(i);
+    }
+    if (Changed)
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+
+    switch (TLI.getOperationAction(Node->getOpcode(),
+                                   Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) {
+        SDOperand Tmp2, RetVal(0, 0);
+        for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+          Tmp2 = LegalizeOp(Tmp1.getValue(i));
+          AddLegalizedOperand(SDOperand(Node, i), Tmp2);
+          if (i == Op.ResNo)
+            RetVal = Tmp2;
+        }
+        assert(RetVal.Val && "Illegal result number");
+        return RetVal;
+      }
+      break;
+    }
+
+    // Since these produce multiple values, make sure to remember that we
+    // legalized all of them.
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      AddLegalizedOperand(SDOperand(Node, i), Result.getValue(i));
+    return Result.getValue(Op.ResNo);
+  }
+
+    // Binary operators
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
+  case ISD::UDIV:
+  case ISD::SDIV:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+    case Expand: assert(0 && "Not possible");
+    case Legal:
+      Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the RHS.
+      break;
+    case Promote:
+      Tmp2 = PromoteOp(Node->getOperand(1));  // Promote the RHS.
+      break;
+    }
+    
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+      
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    default: assert(0 && "BinOp legalize operation not supported");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Expand: {
+      if (Node->getValueType(0) == MVT::i32) {
+        switch (Node->getOpcode()) {
+        default:  assert(0 && "Do not know how to expand this integer BinOp!");
+        case ISD::UDIV:
+        case ISD::SDIV:
+          RTLIB::Libcall LC = Node->getOpcode() == ISD::UDIV
+            ? RTLIB::UDIV_I32 : RTLIB::SDIV_I32;
+          SDOperand Dummy;
+          bool isSigned = Node->getOpcode() == ISD::SDIV;
+          Result = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Dummy);
+        };
+        break;
+      }
+
+      assert(MVT::isVector(Node->getValueType(0)) &&
+             "Cannot expand this binary operator!");
+      // Expand the operation into a bunch of nasty scalar code.
+      SmallVector<SDOperand, 8> Ops;
+      MVT::ValueType EltVT = MVT::getVectorElementType(Node->getValueType(0));
+      MVT::ValueType PtrVT = TLI.getPointerTy();
+      for (unsigned i = 0, e = MVT::getVectorNumElements(Node->getValueType(0));
+           i != e; ++i) {
+        SDOperand Idx = DAG.getConstant(i, PtrVT);
+        SDOperand LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1, Idx);
+        SDOperand RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2, Idx);
+        Ops.push_back(DAG.getNode(Node->getOpcode(), EltVT, LHS, RHS));
+      }
+      Result = DAG.getNode(ISD::BUILD_VECTOR, Node->getValueType(0), 
+                           &Ops[0], Ops.size());
+      break;
+    }
+    case TargetLowering::Promote: {
+      switch (Node->getOpcode()) {
+      default:  assert(0 && "Do not know how to promote this BinOp!");
+      case ISD::AND:
+      case ISD::OR:
+      case ISD::XOR: {
+        MVT::ValueType OVT = Node->getValueType(0);
+        MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+        assert(MVT::isVector(OVT) && "Cannot promote this BinOp!");
+        // Bit convert each of the values to the new type.
+        Tmp1 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp1);
+        Tmp2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp2);
+        Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+        // Bit convert the result back the original type.
+        Result = DAG.getNode(ISD::BIT_CONVERT, OVT, Result);
+        break;
+      }
+      }
+    }
+    }
+    break;
+    
+  case ISD::FCOPYSIGN:  // FCOPYSIGN does not require LHS/RHS to match type!
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+      case Expand: assert(0 && "Not possible");
+      case Legal:
+        Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the RHS.
+        break;
+      case Promote:
+        Tmp2 = PromoteOp(Node->getOperand(1));  // Promote the RHS.
+        break;
+    }
+      
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    default: assert(0 && "Operation not supported");
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Legal: break;
+    case TargetLowering::Expand: {
+      // If this target supports fabs/fneg natively and select is cheap,
+      // do this efficiently.
+      if (!TLI.isSelectExpensive() &&
+          TLI.getOperationAction(ISD::FABS, Tmp1.getValueType()) ==
+          TargetLowering::Legal &&
+          TLI.getOperationAction(ISD::FNEG, Tmp1.getValueType()) ==
+          TargetLowering::Legal) {
+        // Get the sign bit of the RHS.
+        MVT::ValueType IVT = 
+          Tmp2.getValueType() == MVT::f32 ? MVT::i32 : MVT::i64;
+        SDOperand SignBit = DAG.getNode(ISD::BIT_CONVERT, IVT, Tmp2);
+        SignBit = DAG.getSetCC(TLI.getSetCCResultTy(),
+                               SignBit, DAG.getConstant(0, IVT), ISD::SETLT);
+        // Get the absolute value of the result.
+        SDOperand AbsVal = DAG.getNode(ISD::FABS, Tmp1.getValueType(), Tmp1);
+        // Select between the nabs and abs value based on the sign bit of
+        // the input.
+        Result = DAG.getNode(ISD::SELECT, AbsVal.getValueType(), SignBit,
+                             DAG.getNode(ISD::FNEG, AbsVal.getValueType(), 
+                                         AbsVal),
+                             AbsVal);
+        Result = LegalizeOp(Result);
+        break;
+      }
+      
+      // Otherwise, do bitwise ops!
+      MVT::ValueType NVT = 
+        Node->getValueType(0) == MVT::f32 ? MVT::i32 : MVT::i64;
+      Result = ExpandFCOPYSIGNToBitwiseOps(Node, NVT, DAG, TLI);
+      Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), Result);
+      Result = LegalizeOp(Result);
+      break;
+    }
+    }
+    break;
+    
+  case ISD::ADDC:
+  case ISD::SUBC:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    // Since this produces two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result;
+
+  case ISD::ADDE:
+  case ISD::SUBE:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    Tmp2 = LegalizeOp(Node->getOperand(1));
+    Tmp3 = LegalizeOp(Node->getOperand(2));
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+    // Since this produces two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+    AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+    return Result;
+    
+  case ISD::BUILD_PAIR: {
+    MVT::ValueType PairTy = Node->getValueType(0);
+    // TODO: handle the case where the Lo and Hi operands are not of legal type
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // Lo
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // Hi
+    switch (TLI.getOperationAction(ISD::BUILD_PAIR, PairTy)) {
+    case TargetLowering::Promote:
+    case TargetLowering::Custom:
+      assert(0 && "Cannot promote/custom this yet!");
+    case TargetLowering::Legal:
+      if (Tmp1 != Node->getOperand(0) || Tmp2 != Node->getOperand(1))
+        Result = DAG.getNode(ISD::BUILD_PAIR, PairTy, Tmp1, Tmp2);
+      break;
+    case TargetLowering::Expand:
+      Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, PairTy, Tmp1);
+      Tmp2 = DAG.getNode(ISD::ANY_EXTEND, PairTy, Tmp2);
+      Tmp2 = DAG.getNode(ISD::SHL, PairTy, Tmp2,
+                         DAG.getConstant(MVT::getSizeInBits(PairTy)/2, 
+                                         TLI.getShiftAmountTy()));
+      Result = DAG.getNode(ISD::OR, PairTy, Tmp1, Tmp2);
+      break;
+    }
+    break;
+  }
+
+  case ISD::UREM:
+  case ISD::SREM:
+  case ISD::FREM:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // RHS
+
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    case TargetLowering::Promote: assert(0 && "Cannot promote this yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand:
+      unsigned DivOpc= (Node->getOpcode() == ISD::UREM) ? ISD::UDIV : ISD::SDIV;
+      bool isSigned = DivOpc == ISD::SDIV;
+      if (MVT::isInteger(Node->getValueType(0))) {
+        if (TLI.getOperationAction(DivOpc, Node->getValueType(0)) ==
+            TargetLowering::Legal) {
+          // X % Y -> X-X/Y*Y
+          MVT::ValueType VT = Node->getValueType(0);
+          Result = DAG.getNode(DivOpc, VT, Tmp1, Tmp2);
+          Result = DAG.getNode(ISD::MUL, VT, Result, Tmp2);
+          Result = DAG.getNode(ISD::SUB, VT, Tmp1, Result);
+        } else {
+          assert(Node->getValueType(0) == MVT::i32 &&
+                 "Cannot expand this binary operator!");
+          RTLIB::Libcall LC = Node->getOpcode() == ISD::UREM
+            ? RTLIB::UREM_I32 : RTLIB::SREM_I32;
+          SDOperand Dummy;
+          Result = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Dummy);
+        }
+      } else {
+        // Floating point mod -> fmod libcall.
+        RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32
+          ? RTLIB::REM_F32 : RTLIB::REM_F64;
+        SDOperand Dummy;
+        Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                               false/*sign irrelevant*/, Dummy);
+      }
+      break;
+    }
+    break;
+  case ISD::VAARG: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.
+
+    MVT::ValueType VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+      Result = Result.getValue(0);
+      Tmp1 = Result.getValue(1);
+
+      if (isCustom) {
+        Tmp2 = TLI.LowerOperation(Result, DAG);
+        if (Tmp2.Val) {
+          Result = LegalizeOp(Tmp2);
+          Tmp1 = LegalizeOp(Tmp2.getValue(1));
+        }
+      }
+      break;
+    case TargetLowering::Expand: {
+      SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2));
+      SDOperand VAList = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp2,
+                                     SV->getValue(), SV->getOffset());
+      // Increment the pointer, VAList, to the next vaarg
+      Tmp3 = DAG.getNode(ISD::ADD, TLI.getPointerTy(), VAList, 
+                         DAG.getConstant(MVT::getSizeInBits(VT)/8, 
+                                         TLI.getPointerTy()));
+      // Store the incremented VAList to the legalized pointer
+      Tmp3 = DAG.getStore(VAList.getValue(1), Tmp3, Tmp2, SV->getValue(),
+                          SV->getOffset());
+      // Load the actual argument out of the pointer VAList
+      Result = DAG.getLoad(VT, Tmp3, VAList, NULL, 0);
+      Tmp1 = LegalizeOp(Result.getValue(1));
+      Result = LegalizeOp(Result);
+      break;
+    }
+    }
+    // Since VAARG produces two values, make sure to remember that we 
+    // legalized both of them.
+    AddLegalizedOperand(SDOperand(Node, 0), Result);
+    AddLegalizedOperand(SDOperand(Node, 1), Tmp1);
+    return Op.ResNo ? Tmp1 : Result;
+  }
+    
+  case ISD::VACOPY: 
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the dest pointer.
+    Tmp3 = LegalizeOp(Node->getOperand(2));  // Legalize the source pointer.
+
+    switch (TLI.getOperationAction(ISD::VACOPY, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3,
+                                      Node->getOperand(3), Node->getOperand(4));
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand:
+      // This defaults to loading a pointer from the input and storing it to the
+      // output, returning the chain.
+      SrcValueSDNode *SVD = cast<SrcValueSDNode>(Node->getOperand(3));
+      SrcValueSDNode *SVS = cast<SrcValueSDNode>(Node->getOperand(4));
+      Tmp4 = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp3, SVD->getValue(),
+                         SVD->getOffset());
+      Result = DAG.getStore(Tmp4.getValue(1), Tmp4, Tmp2, SVS->getValue(),
+                            SVS->getOffset());
+      break;
+    }
+    break;
+
+  case ISD::VAEND: 
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.
+
+    switch (TLI.getOperationAction(ISD::VAEND, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      isCustom = true;
+      // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Tmp1, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand:
+      Result = Tmp1; // Default to a no-op, return the chain
+      break;
+    }
+    break;
+    
+  case ISD::VASTART: 
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    Tmp2 = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.
+
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+    
+    switch (TLI.getOperationAction(ISD::VASTART, MVT::Other)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    }
+    break;
+    
+  case ISD::ROTL:
+  case ISD::ROTR:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
+    Tmp2 = LegalizeOp(Node->getOperand(1));   // RHS
+    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    default:
+      assert(0 && "ROTL/ROTR legalize operation not supported");
+      break;
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Custom:
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.Val) Result = Tmp1;
+      break;
+    case TargetLowering::Promote:
+      assert(0 && "Do not know how to promote ROTL/ROTR");
+      break;
+    case TargetLowering::Expand:
+      assert(0 && "Do not know how to expand ROTL/ROTR");
+      break;
+    }
+    break;
+    
+  case ISD::BSWAP:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // Op
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    case TargetLowering::Custom:
+      assert(0 && "Cannot custom legalize this yet!");
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    case TargetLowering::Promote: {
+      MVT::ValueType OVT = Tmp1.getValueType();
+      MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+      unsigned DiffBits = MVT::getSizeInBits(NVT) - MVT::getSizeInBits(OVT);
+
+      Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+      Tmp1 = DAG.getNode(ISD::BSWAP, NVT, Tmp1);
+      Result = DAG.getNode(ISD::SRL, NVT, Tmp1,
+                           DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+      break;
+    }
+    case TargetLowering::Expand:
+      Result = ExpandBSWAP(Tmp1);
+      break;
+    }
+    break;
+    
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+    Tmp1 = LegalizeOp(Node->getOperand(0));   // Op
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    case TargetLowering::Custom: assert(0 && "Cannot custom handle this yet!");
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    case TargetLowering::Promote: {
+      MVT::ValueType OVT = Tmp1.getValueType();
+      MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+
+      // Zero extend the argument.
+      Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+      // Perform the larger operation, then subtract if needed.
+      Tmp1 = DAG.getNode(Node->getOpcode(), Node->getValueType(0), Tmp1);
+      switch (Node->getOpcode()) {
+      case ISD::CTPOP:
+        Result = Tmp1;
+        break;
+      case ISD::CTTZ:
+        //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+        Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1,
+                            DAG.getConstant(MVT::getSizeInBits(NVT), NVT),
+                            ISD::SETEQ);
+        Result = DAG.getNode(ISD::SELECT, NVT, Tmp2,
+                           DAG.getConstant(MVT::getSizeInBits(OVT),NVT), Tmp1);
+        break;
+      case ISD::CTLZ:
+        // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+        Result = DAG.getNode(ISD::SUB, NVT, Tmp1,
+                             DAG.getConstant(MVT::getSizeInBits(NVT) -
+                                             MVT::getSizeInBits(OVT), NVT));
+        break;
+      }
+      break;
+    }
+    case TargetLowering::Expand:
+      Result = ExpandBitCount(Node->getOpcode(), Tmp1);
+      break;
+    }
+    break;
+
+    // Unary operators
+  case ISD::FABS:
+  case ISD::FNEG:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+    case TargetLowering::Promote:
+    case TargetLowering::Custom:
+     isCustom = true;
+     // FALLTHROUGH
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      if (isCustom) {
+        Tmp1 = TLI.LowerOperation(Result, DAG);
+        if (Tmp1.Val) Result = Tmp1;
+      }
+      break;
+    case TargetLowering::Expand:
+      switch (Node->getOpcode()) {
+      default: assert(0 && "Unreachable!");
+      case ISD::FNEG:
+        // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
+        Tmp2 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+        Result = DAG.getNode(ISD::FSUB, Node->getValueType(0), Tmp2, Tmp1);
+        break;
+      case ISD::FABS: {
+        // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+        MVT::ValueType VT = Node->getValueType(0);
+        Tmp2 = DAG.getConstantFP(0.0, VT);
+        Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1, Tmp2, ISD::SETUGT);
+        Tmp3 = DAG.getNode(ISD::FNEG, VT, Tmp1);
+        Result = DAG.getNode(ISD::SELECT, VT, Tmp2, Tmp1, Tmp3);
+        break;
+      }
+      case ISD::FSQRT:
+      case ISD::FSIN:
+      case ISD::FCOS: {
+        MVT::ValueType VT = Node->getValueType(0);
+        RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+        switch(Node->getOpcode()) {
+        case ISD::FSQRT:
+          LC = VT == MVT::f32 ? RTLIB::SQRT_F32 : RTLIB::SQRT_F64;
+          break;
+        case ISD::FSIN:
+          LC = VT == MVT::f32 ? RTLIB::SIN_F32 : RTLIB::SIN_F64;
+          break;
+        case ISD::FCOS:
+          LC = VT == MVT::f32 ? RTLIB::COS_F32 : RTLIB::COS_F64;
+          break;
+        default: assert(0 && "Unreachable!");
+        }
+        SDOperand Dummy;
+        Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                               false/*sign irrelevant*/, Dummy);
+        break;
+      }
+      }
+      break;
+    }
+    break;
+  case ISD::FPOWI: {
+    // We always lower FPOWI into a libcall.  No target support it yet.
+    RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32
+      ? RTLIB::POWI_F32 : RTLIB::POWI_F64;
+    SDOperand Dummy;
+    Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                           false/*sign irrelevant*/, Dummy);
+    break;
+  }
+  case ISD::BIT_CONVERT:
+    if (!isTypeLegal(Node->getOperand(0).getValueType())) {
+      Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+    } else if (MVT::isVector(Op.getOperand(0).getValueType())) {
+      // The input has to be a vector type, we have to either scalarize it, pack
+      // it, or convert it based on whether the input vector type is legal.
+      SDNode *InVal = Node->getOperand(0).Val;
+      unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+      MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+    
+      // Figure out if there is a simple type corresponding to this Vector
+      // type.  If so, convert to the vector type.
+      MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+      if (TLI.isTypeLegal(TVT)) {
+        // Turn this into a bit convert of the vector input.
+        Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), 
+                             LegalizeOp(Node->getOperand(0)));
+        break;
+      } else if (NumElems == 1) {
+        // Turn this into a bit convert of the scalar input.
+        Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), 
+                             ScalarizeVectorOp(Node->getOperand(0)));
+        break;
+      } else {
+        // FIXME: UNIMP!  Store then reload
+        assert(0 && "Cast from unsupported vector type not implemented yet!");
+      }
+    } else {
+      switch (TLI.getOperationAction(ISD::BIT_CONVERT,
+                                     Node->getOperand(0).getValueType())) {
+      default: assert(0 && "Unknown operation action!");
+      case TargetLowering::Expand:
+        Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+        break;
+      case TargetLowering::Legal:
+        Tmp1 = LegalizeOp(Node->getOperand(0));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1);
+        break;
+      }
+    }
+    break;
+      
+    // Conversion operators.  The source and destination have different types.
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP: {
+    bool isSigned = Node->getOpcode() == ISD::SINT_TO_FP;
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      switch (TLI.getOperationAction(Node->getOpcode(),
+                                     Node->getOperand(0).getValueType())) {
+      default: assert(0 && "Unknown operation action!");
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Tmp1 = LegalizeOp(Node->getOperand(0));
+        Result = DAG.UpdateNodeOperands(Result, Tmp1);
+        if (isCustom) {
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.Val) Result = Tmp1;
+        }
+        break;
+      case TargetLowering::Expand:
+        Result = ExpandLegalINT_TO_FP(isSigned,
+                                      LegalizeOp(Node->getOperand(0)),
+                                      Node->getValueType(0));
+        break;
+      case TargetLowering::Promote:
+        Result = PromoteLegalINT_TO_FP(LegalizeOp(Node->getOperand(0)),
+                                       Node->getValueType(0),
+                                       isSigned);
+        break;
+      }
+      break;
+    case Expand:
+      Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP,
+                             Node->getValueType(0), Node->getOperand(0));
+      break;
+    case Promote:
+      Tmp1 = PromoteOp(Node->getOperand(0));
+      if (isSigned) {
+        Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, Tmp1.getValueType(),
+                 Tmp1, DAG.getValueType(Node->getOperand(0).getValueType()));
+      } else {
+        Tmp1 = DAG.getZeroExtendInReg(Tmp1,
+                                      Node->getOperand(0).getValueType());
+      }
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      Result = LegalizeOp(Result);  // The 'op' is not necessarily legal!
+      break;
+    }
+    break;
+  }
+  case ISD::TRUNCATE:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    case Expand:
+      ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+
+      // Since the result is legal, we should just be able to truncate the low
+      // part of the source.
+      Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Tmp1);
+      break;
+    case Promote:
+      Result = PromoteOp(Node->getOperand(0));
+      Result = DAG.getNode(ISD::TRUNCATE, Op.getValueType(), Result);
+      break;
+    }
+    break;
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+
+      switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))){
+      default: assert(0 && "Unknown operation action!");
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Result = DAG.UpdateNodeOperands(Result, Tmp1);
+        if (isCustom) {
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.Val) Result = Tmp1;
+        }
+        break;
+      case TargetLowering::Promote:
+        Result = PromoteLegalFP_TO_INT(Tmp1, Node->getValueType(0),
+                                       Node->getOpcode() == ISD::FP_TO_SINT);
+        break;
+      case TargetLowering::Expand:
+        if (Node->getOpcode() == ISD::FP_TO_UINT) {
+          SDOperand True, False;
+          MVT::ValueType VT =  Node->getOperand(0).getValueType();
+          MVT::ValueType NVT = Node->getValueType(0);
+          unsigned ShiftAmt = MVT::getSizeInBits(Node->getValueType(0))-1;
+          Tmp2 = DAG.getConstantFP((double)(1ULL << ShiftAmt), VT);
+          Tmp3 = DAG.getSetCC(TLI.getSetCCResultTy(),
+                            Node->getOperand(0), Tmp2, ISD::SETLT);
+          True = DAG.getNode(ISD::FP_TO_SINT, NVT, Node->getOperand(0));
+          False = DAG.getNode(ISD::FP_TO_SINT, NVT,
+                              DAG.getNode(ISD::FSUB, VT, Node->getOperand(0),
+                                          Tmp2));
+          False = DAG.getNode(ISD::XOR, NVT, False, 
+                              DAG.getConstant(1ULL << ShiftAmt, NVT));
+          Result = DAG.getNode(ISD::SELECT, NVT, Tmp3, True, False);
+          break;
+        } else {
+          assert(0 && "Do not know how to expand FP_TO_SINT yet!");
+        }
+        break;
+      }
+      break;
+    case Expand: {
+      // Convert f32 / f64 to i32 / i64.
+      MVT::ValueType VT = Op.getValueType();
+      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+      switch (Node->getOpcode()) {
+      case ISD::FP_TO_SINT:
+        if (Node->getOperand(0).getValueType() == MVT::f32)
+          LC = (VT == MVT::i32)
+            ? RTLIB::FPTOSINT_F32_I32 : RTLIB::FPTOSINT_F32_I64;
+        else
+          LC = (VT == MVT::i32)
+            ? RTLIB::FPTOSINT_F64_I32 : RTLIB::FPTOSINT_F64_I64;
+        break;
+      case ISD::FP_TO_UINT:
+        if (Node->getOperand(0).getValueType() == MVT::f32)
+          LC = (VT == MVT::i32)
+            ? RTLIB::FPTOUINT_F32_I32 : RTLIB::FPTOSINT_F32_I64;
+        else
+          LC = (VT == MVT::i32)
+            ? RTLIB::FPTOUINT_F64_I32 : RTLIB::FPTOSINT_F64_I64;
+        break;
+      default: assert(0 && "Unreachable!");
+      }
+      SDOperand Dummy;
+      Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                             false/*sign irrelevant*/, Dummy);
+      break;
+    }
+    case Promote:
+      Tmp1 = PromoteOp(Node->getOperand(0));
+      Result = DAG.UpdateNodeOperands(Result, LegalizeOp(Tmp1));
+      Result = LegalizeOp(Result);
+      break;
+    }
+    break;
+
+  case ISD::FP_ROUND:
+    if (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)) == 
+        TargetLowering::Expand) {
+      // The only way we can lower this is to turn it into a TRUNCSTORE,
+      // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+      // NOTE: there is a choice here between constantly creating new stack
+      // slots and always reusing the same one.  We currently always create
+      // new ones, as reuse may inhibit scheduling.
+      MVT::ValueType VT = Op.getValueType();    // 32
+      const Type *Ty = MVT::getTypeForValueType(VT);
+      uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+      unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+      MachineFunction &MF = DAG.getMachineFunction();
+      int SSFI =
+        MF.getFrameInfo()->CreateStackObject(TySize, Align);
+      SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+      Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0),
+                                 StackSlot, NULL, 0, VT);
+      Result = DAG.getLoad(VT, Result, StackSlot, NULL, 0, VT);
+      break;
+    }
+    // FALL THROUGH
+  case ISD::ANY_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::FP_EXTEND:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Expand: assert(0 && "Shouldn't need to expand other operators here!");
+    case Legal:
+      Tmp1 = LegalizeOp(Node->getOperand(0));
+      Result = DAG.UpdateNodeOperands(Result, Tmp1);
+      break;
+    case Promote:
+      switch (Node->getOpcode()) {
+      case ISD::ANY_EXTEND:
+        Tmp1 = PromoteOp(Node->getOperand(0));
+        Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Tmp1);
+        break;
+      case ISD::ZERO_EXTEND:
+        Result = PromoteOp(Node->getOperand(0));
+        Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Result);
+        Result = DAG.getZeroExtendInReg(Result,
+                                        Node->getOperand(0).getValueType());
+        break;
+      case ISD::SIGN_EXTEND:
+        Result = PromoteOp(Node->getOperand(0));
+        Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Result);
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+                             Result,
+                          DAG.getValueType(Node->getOperand(0).getValueType()));
+        break;
+      case ISD::FP_EXTEND:
+        Result = PromoteOp(Node->getOperand(0));
+        if (Result.getValueType() != Op.getValueType())
+          // Dynamically dead while we have only 2 FP types.
+          Result = DAG.getNode(ISD::FP_EXTEND, Op.getValueType(), Result);
+        break;
+      case ISD::FP_ROUND:
+        Result = PromoteOp(Node->getOperand(0));
+        Result = DAG.getNode(Node->getOpcode(), Op.getValueType(), Result);
+        break;
+      }
+    }
+    break;
+  case ISD::FP_ROUND_INREG:
+  case ISD::SIGN_EXTEND_INREG: {
+    Tmp1 = LegalizeOp(Node->getOperand(0));
+    MVT::ValueType ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+
+    // If this operation is not supported, convert it to a shl/shr or load/store
+    // pair.
+    switch (TLI.getOperationAction(Node->getOpcode(), ExtraVT)) {
+    default: assert(0 && "This action not supported for this op yet!");
+    case TargetLowering::Legal:
+      Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+      break;
+    case TargetLowering::Expand:
+      // If this is an integer extend and shifts are supported, do that.
+      if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+        // NOTE: we could fall back on load/store here too for targets without
+        // SAR.  However, it is doubtful that any exist.
+        unsigned BitsDiff = MVT::getSizeInBits(Node->getValueType(0)) -
+                            MVT::getSizeInBits(ExtraVT);
+        SDOperand ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
+        Result = DAG.getNode(ISD::SHL, Node->getValueType(0),
+                             Node->getOperand(0), ShiftCst);
+        Result = DAG.getNode(ISD::SRA, Node->getValueType(0),
+                             Result, ShiftCst);
+      } else if (Node->getOpcode() == ISD::FP_ROUND_INREG) {
+        // The only way we can lower this is to turn it into a TRUNCSTORE,
+        // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+        // NOTE: there is a choice here between constantly creating new stack
+        // slots and always reusing the same one.  We currently always create
+        // new ones, as reuse may inhibit scheduling.
+        const Type *Ty = MVT::getTypeForValueType(ExtraVT);
+        uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI =
+          MF.getFrameInfo()->CreateStackObject(TySize, Align);
+        SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0),
+                                   StackSlot, NULL, 0, ExtraVT);
+        Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0),
+                                Result, StackSlot, NULL, 0, ExtraVT);
+      } else {
+        assert(0 && "Unknown op");
+      }
+      break;
+    }
+    break;
+  }
+  }
+  
+  assert(Result.getValueType() == Op.getValueType() &&
+         "Bad legalization!");
+  
+  // Make sure that the generated code is itself legal.
+  if (Result != Op)
+    Result = LegalizeOp(Result);
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
+  return Result;
+}
+
+/// PromoteOp - Given an operation that produces a value in an invalid type,
+/// promote it to compute the value into a larger type.  The produced value will
+/// have the correct bits for the low portion of the register, but no guarantee
+/// is made about the top bits: it may be zero, sign-extended, or garbage.
+SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) {
+  MVT::ValueType VT = Op.getValueType();
+  MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+  assert(getTypeAction(VT) == Promote &&
+         "Caller should expand or legalize operands that are not promotable!");
+  assert(NVT > VT && MVT::isInteger(NVT) == MVT::isInteger(VT) &&
+         "Cannot promote to smaller type!");
+
+  SDOperand Tmp1, Tmp2, Tmp3;
+  SDOperand Result;
+  SDNode *Node = Op.Val;
+
+  DenseMap<SDOperand, SDOperand>::iterator I = PromotedNodes.find(Op);
+  if (I != PromotedNodes.end()) return I->second;
+
+  switch (Node->getOpcode()) {
+  case ISD::CopyFromReg:
+    assert(0 && "CopyFromReg must be legal!");
+  default:
+#ifndef NDEBUG
+    cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to promote this operator!");
+    abort();
+  case ISD::UNDEF:
+    Result = DAG.getNode(ISD::UNDEF, NVT);
+    break;
+  case ISD::Constant:
+    if (VT != MVT::i1)
+      Result = DAG.getNode(ISD::SIGN_EXTEND, NVT, Op);
+    else
+      Result = DAG.getNode(ISD::ZERO_EXTEND, NVT, Op);
+    assert(isa<ConstantSDNode>(Result) && "Didn't constant fold zext?");
+    break;
+  case ISD::ConstantFP:
+    Result = DAG.getNode(ISD::FP_EXTEND, NVT, Op);
+    assert(isa<ConstantFPSDNode>(Result) && "Didn't constant fold fp_extend?");
+    break;
+
+  case ISD::SETCC:
+    assert(isTypeLegal(TLI.getSetCCResultTy()) && "SetCC type is not legal??");
+    Result = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(),Node->getOperand(0),
+                         Node->getOperand(1), Node->getOperand(2));
+    break;
+    
+  case ISD::TRUNCATE:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      Result = LegalizeOp(Node->getOperand(0));
+      assert(Result.getValueType() >= NVT &&
+             "This truncation doesn't make sense!");
+      if (Result.getValueType() > NVT)    // Truncate to NVT instead of VT
+        Result = DAG.getNode(ISD::TRUNCATE, NVT, Result);
+      break;
+    case Promote:
+      // The truncation is not required, because we don't guarantee anything
+      // about high bits anyway.
+      Result = PromoteOp(Node->getOperand(0));
+      break;
+    case Expand:
+      ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+      // Truncate the low part of the expanded value to the result type
+      Result = DAG.getNode(ISD::TRUNCATE, NVT, Tmp1);
+    }
+    break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Expand: assert(0 && "BUG: Smaller reg should have been promoted!");
+    case Legal:
+      // Input is legal?  Just do extend all the way to the larger type.
+      Result = DAG.getNode(Node->getOpcode(), NVT, Node->getOperand(0));
+      break;
+    case Promote:
+      // Promote the reg if it's smaller.
+      Result = PromoteOp(Node->getOperand(0));
+      // The high bits are not guaranteed to be anything.  Insert an extend.
+      if (Node->getOpcode() == ISD::SIGN_EXTEND)
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Result,
+                         DAG.getValueType(Node->getOperand(0).getValueType()));
+      else if (Node->getOpcode() == ISD::ZERO_EXTEND)
+        Result = DAG.getZeroExtendInReg(Result,
+                                        Node->getOperand(0).getValueType());
+      break;
+    }
+    break;
+  case ISD::BIT_CONVERT:
+    Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+    Result = PromoteOp(Result);
+    break;
+    
+  case ISD::FP_EXTEND:
+    assert(0 && "Case not implemented.  Dynamically dead with 2 FP types!");
+  case ISD::FP_ROUND:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Expand: assert(0 && "BUG: Cannot expand FP regs!");
+    case Promote:  assert(0 && "Unreachable with 2 FP types!");
+    case Legal:
+      // Input is legal?  Do an FP_ROUND_INREG.
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Node->getOperand(0),
+                           DAG.getValueType(VT));
+      break;
+    }
+    break;
+
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+      // No extra round required here.
+      Result = DAG.getNode(Node->getOpcode(), NVT, Node->getOperand(0));
+      break;
+
+    case Promote:
+      Result = PromoteOp(Node->getOperand(0));
+      if (Node->getOpcode() == ISD::SINT_TO_FP)
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+                             Result,
+                         DAG.getValueType(Node->getOperand(0).getValueType()));
+      else
+        Result = DAG.getZeroExtendInReg(Result,
+                                        Node->getOperand(0).getValueType());
+      // No extra round required here.
+      Result = DAG.getNode(Node->getOpcode(), NVT, Result);
+      break;
+    case Expand:
+      Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, NVT,
+                             Node->getOperand(0));
+      // Round if we cannot tolerate excess precision.
+      if (NoExcessFPPrecision)
+        Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                             DAG.getValueType(VT));
+      break;
+    }
+    break;
+
+  case ISD::SIGN_EXTEND_INREG:
+    Result = PromoteOp(Node->getOperand(0));
+    Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Result, 
+                         Node->getOperand(1));
+    break;
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+    case Legal:
+    case Expand:
+      Tmp1 = Node->getOperand(0);
+      break;
+    case Promote:
+      // The input result is prerounded, so we don't have to do anything
+      // special.
+      Tmp1 = PromoteOp(Node->getOperand(0));
+      break;
+    }
+    // If we're promoting a UINT to a larger size, check to see if the new node
+    // will be legal.  If it isn't, check to see if FP_TO_SINT is legal, since
+    // we can use that instead.  This allows us to generate better code for
+    // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not
+    // legal, such as PowerPC.
+    if (Node->getOpcode() == ISD::FP_TO_UINT && 
+        !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+        (TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) ||
+         TLI.getOperationAction(ISD::FP_TO_SINT, NVT)==TargetLowering::Custom)){
+      Result = DAG.getNode(ISD::FP_TO_SINT, NVT, Tmp1);
+    } else {
+      Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+    }
+    break;
+
+  case ISD::FABS:
+  case ISD::FNEG:
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    assert(Tmp1.getValueType() == NVT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+    // NOTE: we do not have to do any extra rounding here for
+    // NoExcessFPPrecision, because we know the input will have the appropriate
+    // precision, and these operations don't modify precision at all.
+    break;
+
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    assert(Tmp1.getValueType() == NVT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+    if (NoExcessFPPrecision)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+
+  case ISD::FPOWI: {
+    // Promote f32 powi to f64 powi.  Note that this could insert a libcall
+    // directly as well, which may be better.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    assert(Tmp1.getValueType() == NVT);
+    Result = DAG.getNode(ISD::FPOWI, NVT, Tmp1, Node->getOperand(1));
+    if (NoExcessFPPrecision)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+  }
+    
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+    // The input may have strange things in the top bits of the registers, but
+    // these operations don't care.  They may have weird bits going out, but
+    // that too is okay if they are integer operations.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp2 = PromoteOp(Node->getOperand(1));
+    assert(Tmp1.getValueType() == NVT && Tmp2.getValueType() == NVT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+    break;
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp2 = PromoteOp(Node->getOperand(1));
+    assert(Tmp1.getValueType() == NVT && Tmp2.getValueType() == NVT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+    
+    // Floating point operations will give excess precision that we may not be
+    // able to tolerate.  If we DO allow excess precision, just leave it,
+    // otherwise excise it.
+    // FIXME: Why would we need to round FP ops more than integer ones?
+    //     Is Round(Add(Add(A,B),C)) != Round(Add(Round(Add(A,B)), C))
+    if (NoExcessFPPrecision)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+
+  case ISD::SDIV:
+  case ISD::SREM:
+    // These operators require that their input be sign extended.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp2 = PromoteOp(Node->getOperand(1));
+    if (MVT::isInteger(NVT)) {
+      Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+                         DAG.getValueType(VT));
+      Tmp2 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp2,
+                         DAG.getValueType(VT));
+    }
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+
+    // Perform FP_ROUND: this is probably overly pessimistic.
+    if (MVT::isFloatingPoint(NVT) && NoExcessFPPrecision)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::FCOPYSIGN:
+    // These operators require that their input be fp extended.
+    switch (getTypeAction(Node->getOperand(0).getValueType())) {
+      case Legal:
+        Tmp1 = LegalizeOp(Node->getOperand(0));
+        break;
+      case Promote:
+        Tmp1 = PromoteOp(Node->getOperand(0));
+        break;
+      case Expand:
+        assert(0 && "not implemented");
+    }
+    switch (getTypeAction(Node->getOperand(1).getValueType())) {
+      case Legal:
+        Tmp2 = LegalizeOp(Node->getOperand(1));
+        break;
+      case Promote:
+        Tmp2 = PromoteOp(Node->getOperand(1));
+        break;
+      case Expand:
+        assert(0 && "not implemented");
+    }
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+    
+    // Perform FP_ROUND: this is probably overly pessimistic.
+    if (NoExcessFPPrecision && Node->getOpcode() != ISD::FCOPYSIGN)
+      Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+                           DAG.getValueType(VT));
+    break;
+
+  case ISD::UDIV:
+  case ISD::UREM:
+    // These operators require that their input be zero extended.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp2 = PromoteOp(Node->getOperand(1));
+    assert(MVT::isInteger(NVT) && "Operators don't apply to FP!");
+    Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+    Tmp2 = DAG.getZeroExtendInReg(Tmp2, VT);
+    Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+    break;
+
+  case ISD::SHL:
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Result = DAG.getNode(ISD::SHL, NVT, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::SRA:
+    // The input value must be properly sign extended.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+                       DAG.getValueType(VT));
+    Result = DAG.getNode(ISD::SRA, NVT, Tmp1, Node->getOperand(1));
+    break;
+  case ISD::SRL:
+    // The input value must be properly zero extended.
+    Tmp1 = PromoteOp(Node->getOperand(0));
+    Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+    Result = DAG.getNode(ISD::SRL, NVT, Tmp1, Node->getOperand(1));
+    break;
+
+  case ISD::VAARG:
+    Tmp1 = Node->getOperand(0);   // Get the chain.
+    Tmp2 = Node->getOperand(1);   // Get the pointer.
+    if (TLI.getOperationAction(ISD::VAARG, VT) == TargetLowering::Custom) {
+      Tmp3 = DAG.getVAArg(VT, Tmp1, Tmp2, Node->getOperand(2));
+      Result = TLI.CustomPromoteOperation(Tmp3, DAG);
+    } else {
+      SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2));
+      SDOperand VAList = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp2,
+                                     SV->getValue(), SV->getOffset());
+      // Increment the pointer, VAList, to the next vaarg
+      Tmp3 = DAG.getNode(ISD::ADD, TLI.getPointerTy(), VAList, 
+                         DAG.getConstant(MVT::getSizeInBits(VT)/8, 
+                                         TLI.getPointerTy()));
+      // Store the incremented VAList to the legalized pointer
+      Tmp3 = DAG.getStore(VAList.getValue(1), Tmp3, Tmp2, SV->getValue(),
+                          SV->getOffset());
+      // Load the actual argument out of the pointer VAList
+      Result = DAG.getExtLoad(ISD::EXTLOAD, NVT, Tmp3, VAList, NULL, 0, VT);
+    }
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+    break;
+
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(Node)
+      ? ISD::EXTLOAD : LD->getExtensionType();
+    Result = DAG.getExtLoad(ExtType, NVT,
+                            LD->getChain(), LD->getBasePtr(),
+                            LD->getSrcValue(), LD->getSrcValueOffset(),
+                            LD->getLoadedVT(),
+                            LD->isVolatile(),
+                            LD->getAlignment());
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+    break;
+  }
+  case ISD::SELECT:
+    Tmp2 = PromoteOp(Node->getOperand(1));   // Legalize the op0
+    Tmp3 = PromoteOp(Node->getOperand(2));   // Legalize the op1
+    Result = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), Tmp2, Tmp3);
+    break;
+  case ISD::SELECT_CC:
+    Tmp2 = PromoteOp(Node->getOperand(2));   // True
+    Tmp3 = PromoteOp(Node->getOperand(3));   // False
+    Result = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+                         Node->getOperand(1), Tmp2, Tmp3, Node->getOperand(4));
+    break;
+  case ISD::BSWAP:
+    Tmp1 = Node->getOperand(0);
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::BSWAP, NVT, Tmp1);
+    Result = DAG.getNode(ISD::SRL, NVT, Tmp1,
+                         DAG.getConstant(MVT::getSizeInBits(NVT) -
+                                         MVT::getSizeInBits(VT),
+                                         TLI.getShiftAmountTy()));
+    break;
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+    // Zero extend the argument
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Node->getOperand(0));
+    // Perform the larger operation, then subtract if needed.
+    Tmp1 = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+    switch(Node->getOpcode()) {
+    case ISD::CTPOP:
+      Result = Tmp1;
+      break;
+    case ISD::CTTZ:
+      // if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+      Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1,
+                          DAG.getConstant(MVT::getSizeInBits(NVT), NVT),
+                          ISD::SETEQ);
+      Result = DAG.getNode(ISD::SELECT, NVT, Tmp2,
+                           DAG.getConstant(MVT::getSizeInBits(VT), NVT), Tmp1);
+      break;
+    case ISD::CTLZ:
+      //Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+      Result = DAG.getNode(ISD::SUB, NVT, Tmp1,
+                           DAG.getConstant(MVT::getSizeInBits(NVT) -
+                                           MVT::getSizeInBits(VT), NVT));
+      break;
+    }
+    break;
+  case ISD::EXTRACT_SUBVECTOR:
+    Result = PromoteOp(ExpandEXTRACT_SUBVECTOR(Op));
+    break;
+  case ISD::EXTRACT_VECTOR_ELT:
+    Result = PromoteOp(ExpandEXTRACT_VECTOR_ELT(Op));
+    break;
+  }
+
+  assert(Result.Val && "Didn't set a result!");
+
+  // Make sure the result is itself legal.
+  Result = LegalizeOp(Result);
+  
+  // Remember that we promoted this!
+  AddPromotedOperand(Op, Result);
+  return Result;
+}
+
+/// ExpandEXTRACT_VECTOR_ELT - Expand an EXTRACT_VECTOR_ELT operation into
+/// a legal EXTRACT_VECTOR_ELT operation, scalar code, or memory traffic,
+/// based on the vector type. The return type of this matches the element type
+/// of the vector, which may not be legal for the target.
+SDOperand SelectionDAGLegalize::ExpandEXTRACT_VECTOR_ELT(SDOperand Op) {
+  // We know that operand #0 is the Vec vector.  If the index is a constant
+  // or if the invec is a supported hardware type, we can use it.  Otherwise,
+  // lower to a store then an indexed load.
+  SDOperand Vec = Op.getOperand(0);
+  SDOperand Idx = Op.getOperand(1);
+  
+  SDNode *InVal = Vec.Val;
+  MVT::ValueType TVT = InVal->getValueType(0);
+  unsigned NumElems = MVT::getVectorNumElements(TVT);
+  
+  switch (TLI.getOperationAction(ISD::EXTRACT_VECTOR_ELT, TVT)) {
+  default: assert(0 && "This action is not supported yet!");
+  case TargetLowering::Custom: {
+    Vec = LegalizeOp(Vec);
+    Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+    SDOperand Tmp3 = TLI.LowerOperation(Op, DAG);
+    if (Tmp3.Val)
+      return Tmp3;
+    break;
+  }
+  case TargetLowering::Legal:
+    if (isTypeLegal(TVT)) {
+      Vec = LegalizeOp(Vec);
+      Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+      Op = LegalizeOp(Op);
+    }
+    break;
+  case TargetLowering::Expand:
+    break;
+  }
+
+  if (NumElems == 1) {
+    // This must be an access of the only element.  Return it.
+    Op = ScalarizeVectorOp(Vec);
+  } else if (!TLI.isTypeLegal(TVT) && isa<ConstantSDNode>(Idx)) {
+    ConstantSDNode *CIdx = cast<ConstantSDNode>(Idx);
+    SDOperand Lo, Hi;
+    SplitVectorOp(Vec, Lo, Hi);
+    if (CIdx->getValue() < NumElems/2) {
+      Vec = Lo;
+    } else {
+      Vec = Hi;
+      Idx = DAG.getConstant(CIdx->getValue() - NumElems/2,
+                            Idx.getValueType());
+    }
+  
+    // It's now an extract from the appropriate high or low part.  Recurse.
+    Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+    Op = ExpandEXTRACT_VECTOR_ELT(Op);
+  } else {
+    // Store the value to a temporary stack slot, then LOAD the scalar
+    // element back out.
+    SDOperand StackPtr = CreateStackTemporary(Vec.getValueType());
+    SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Vec, StackPtr, NULL, 0);
+
+    // Add the offset to the index.
+    unsigned EltSize = MVT::getSizeInBits(Op.getValueType())/8;
+    Idx = DAG.getNode(ISD::MUL, Idx.getValueType(), Idx,
+                      DAG.getConstant(EltSize, Idx.getValueType()));
+    StackPtr = DAG.getNode(ISD::ADD, Idx.getValueType(), Idx, StackPtr);
+
+    Op = DAG.getLoad(Op.getValueType(), Ch, StackPtr, NULL, 0);
+  }
+  return Op;
+}
+
+/// ExpandEXTRACT_SUBVECTOR - Expand a EXTRACT_SUBVECTOR operation.  For now
+/// we assume the operation can be split if it is not already legal.
+SDOperand SelectionDAGLegalize::ExpandEXTRACT_SUBVECTOR(SDOperand Op) {
+  // We know that operand #0 is the Vec vector.  For now we assume the index
+  // is a constant and that the extracted result is a supported hardware type.
+  SDOperand Vec = Op.getOperand(0);
+  SDOperand Idx = LegalizeOp(Op.getOperand(1));
+  
+  unsigned NumElems = MVT::getVectorNumElements(Vec.getValueType());
+  
+  if (NumElems == MVT::getVectorNumElements(Op.getValueType())) {
+    // This must be an access of the desired vector length.  Return it.
+    return Vec;
+  }
+
+  ConstantSDNode *CIdx = cast<ConstantSDNode>(Idx);
+  SDOperand Lo, Hi;
+  SplitVectorOp(Vec, Lo, Hi);
+  if (CIdx->getValue() < NumElems/2) {
+    Vec = Lo;
+  } else {
+    Vec = Hi;
+    Idx = DAG.getConstant(CIdx->getValue() - NumElems/2, Idx.getValueType());
+  }
+  
+  // It's now an extract from the appropriate high or low part.  Recurse.
+  Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+  return ExpandEXTRACT_SUBVECTOR(Op);
+}
+
+/// LegalizeSetCCOperands - Attempts to create a legal LHS and RHS for a SETCC
+/// with condition CC on the current target.  This usually involves legalizing
+/// or promoting the arguments.  In the case where LHS and RHS must be expanded,
+/// there may be no choice but to create a new SetCC node to represent the
+/// legalized value of setcc lhs, rhs.  In this case, the value is returned in
+/// LHS, and the SDOperand returned in RHS has a nil SDNode value.
+void SelectionDAGLegalize::LegalizeSetCCOperands(SDOperand &LHS,
+                                                 SDOperand &RHS,
+                                                 SDOperand &CC) {
+  SDOperand Tmp1, Tmp2, Result;    
+  
+  switch (getTypeAction(LHS.getValueType())) {
+  case Legal:
+    Tmp1 = LegalizeOp(LHS);   // LHS
+    Tmp2 = LegalizeOp(RHS);   // RHS
+    break;
+  case Promote:
+    Tmp1 = PromoteOp(LHS);   // LHS
+    Tmp2 = PromoteOp(RHS);   // RHS
+
+    // If this is an FP compare, the operands have already been extended.
+    if (MVT::isInteger(LHS.getValueType())) {
+      MVT::ValueType VT = LHS.getValueType();
+      MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+
+      // Otherwise, we have to insert explicit sign or zero extends.  Note
+      // that we could insert sign extends for ALL conditions, but zero extend
+      // is cheaper on many machines (an AND instead of two shifts), so prefer
+      // it.
+      switch (cast<CondCodeSDNode>(CC)->get()) {
+      default: assert(0 && "Unknown integer comparison!");
+      case ISD::SETEQ:
+      case ISD::SETNE:
+      case ISD::SETUGE:
+      case ISD::SETUGT:
+      case ISD::SETULE:
+      case ISD::SETULT:
+        // ALL of these operations will work if we either sign or zero extend
+        // the operands (including the unsigned comparisons!).  Zero extend is
+        // usually a simpler/cheaper operation, so prefer it.
+        Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+        Tmp2 = DAG.getZeroExtendInReg(Tmp2, VT);
+        break;
+      case ISD::SETGE:
+      case ISD::SETGT:
+      case ISD::SETLT:
+      case ISD::SETLE:
+        Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+                           DAG.getValueType(VT));
+        Tmp2 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp2,
+                           DAG.getValueType(VT));
+        break;
+      }
+    }
+    break;
+  case Expand: {
+    MVT::ValueType VT = LHS.getValueType();
+    if (VT == MVT::f32 || VT == MVT::f64) {
+      // Expand into one or more soft-fp libcall(s).
+      RTLIB::Libcall LC1, LC2 = RTLIB::UNKNOWN_LIBCALL;
+      switch (cast<CondCodeSDNode>(CC)->get()) {
+      case ISD::SETEQ:
+      case ISD::SETOEQ:
+        LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+        break;
+      case ISD::SETNE:
+      case ISD::SETUNE:
+        LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+        break;
+      case ISD::SETGE:
+      case ISD::SETOGE:
+        LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+        break;
+      case ISD::SETLT:
+      case ISD::SETOLT:
+        LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+        break;
+      case ISD::SETLE:
+      case ISD::SETOLE:
+        LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+        break;
+      case ISD::SETGT:
+      case ISD::SETOGT:
+        LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+        break;
+      case ISD::SETUO:
+        LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+        break;
+      case ISD::SETO:
+        LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+        break;
+      default:
+        LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+        switch (cast<CondCodeSDNode>(CC)->get()) {
+        case ISD::SETONE:
+          // SETONE = SETOLT | SETOGT
+          LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+          // Fallthrough
+        case ISD::SETUGT:
+          LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+          break;
+        case ISD::SETUGE:
+          LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+          break;
+        case ISD::SETULT:
+          LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+          break;
+        case ISD::SETULE:
+          LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+          break;
+        case ISD::SETUEQ:
+          LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+          break;
+        default: assert(0 && "Unsupported FP setcc!");
+        }
+      }
+      
+      SDOperand Dummy;
+      Tmp1 = ExpandLibCall(TLI.getLibcallName(LC1),
+                           DAG.getNode(ISD::MERGE_VALUES, VT, LHS, RHS).Val, 
+                           false /*sign irrelevant*/, Dummy);
+      Tmp2 = DAG.getConstant(0, MVT::i32);
+      CC = DAG.getCondCode(TLI.getCmpLibcallCC(LC1));
+      if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+        Tmp1 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), Tmp1, Tmp2, CC);
+        LHS = ExpandLibCall(TLI.getLibcallName(LC2),
+                            DAG.getNode(ISD::MERGE_VALUES, VT, LHS, RHS).Val, 
+                            false /*sign irrelevant*/, Dummy);
+        Tmp2 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), LHS, Tmp2,
+                           DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+        Tmp1 = DAG.getNode(ISD::OR, Tmp1.getValueType(), Tmp1, Tmp2);
+        Tmp2 = SDOperand();
+      }
+      LHS = Tmp1;
+      RHS = Tmp2;
+      return;
+    }
+
+    SDOperand LHSLo, LHSHi, RHSLo, RHSHi;
+    ExpandOp(LHS, LHSLo, LHSHi);
+    ExpandOp(RHS, RHSLo, RHSHi);    
+    switch (cast<CondCodeSDNode>(CC)->get()) {
+    case ISD::SETEQ:
+    case ISD::SETNE:
+      if (RHSLo == RHSHi)
+        if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo))
+          if (RHSCST->isAllOnesValue()) {
+            // Comparison to -1.
+            Tmp1 = DAG.getNode(ISD::AND, LHSLo.getValueType(), LHSLo, LHSHi);
+            Tmp2 = RHSLo;
+            break;
+          }
+
+      Tmp1 = DAG.getNode(ISD::XOR, LHSLo.getValueType(), LHSLo, RHSLo);
+      Tmp2 = DAG.getNode(ISD::XOR, LHSLo.getValueType(), LHSHi, RHSHi);
+      Tmp1 = DAG.getNode(ISD::OR, Tmp1.getValueType(), Tmp1, Tmp2);
+      Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+      break;
+    default:
+      // If this is a comparison of the sign bit, just look at the top part.
+      // X > -1,  x < 0
+      if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(RHS))
+        if ((cast<CondCodeSDNode>(CC)->get() == ISD::SETLT && 
+             CST->getValue() == 0) ||             // X < 0
+            (cast<CondCodeSDNode>(CC)->get() == ISD::SETGT &&
+             CST->isAllOnesValue())) {            // X > -1
+          Tmp1 = LHSHi;
+          Tmp2 = RHSHi;
+          break;
+        }
+
+      // FIXME: This generated code sucks.
+      ISD::CondCode LowCC;
+      ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+      switch (CCCode) {
+      default: assert(0 && "Unknown integer setcc!");
+      case ISD::SETLT:
+      case ISD::SETULT: LowCC = ISD::SETULT; break;
+      case ISD::SETGT:
+      case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+      case ISD::SETLE:
+      case ISD::SETULE: LowCC = ISD::SETULE; break;
+      case ISD::SETGE:
+      case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+      }
+
+      // Tmp1 = lo(op1) < lo(op2)   // Always unsigned comparison
+      // Tmp2 = hi(op1) < hi(op2)   // Signedness depends on operands
+      // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+      // NOTE: on targets without efficient SELECT of bools, we can always use
+      // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+      TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL);
+      Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSLo, RHSLo, LowCC,
+                               false, DagCombineInfo);
+      if (!Tmp1.Val)
+        Tmp1 = DAG.getSetCC(TLI.getSetCCResultTy(), LHSLo, RHSLo, LowCC);
+      Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi,
+                               CCCode, false, DagCombineInfo);
+      if (!Tmp2.Val)
+        Tmp2 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), LHSHi, RHSHi, CC);
+      
+      ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.Val);
+      ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.Val);
+      if ((Tmp1C && Tmp1C->getValue() == 0) ||
+          (Tmp2C && Tmp2C->getValue() == 0 &&
+           (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+            CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+          (Tmp2C && Tmp2C->getValue() == 1 &&
+           (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+            CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+        // low part is known false, returns high part.
+        // For LE / GE, if high part is known false, ignore the low part.
+        // For LT / GT, if high part is known true, ignore the low part.
+        Tmp1 = Tmp2;
+        Tmp2 = SDOperand();
+      } else {
+        Result = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi,
+                                   ISD::SETEQ, false, DagCombineInfo);
+        if (!Result.Val)
+          Result=DAG.getSetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi, ISD::SETEQ);
+        Result = LegalizeOp(DAG.getNode(ISD::SELECT, Tmp1.getValueType(),
+                                        Result, Tmp1, Tmp2));
+        Tmp1 = Result;
+        Tmp2 = SDOperand();
+      }
+    }
+  }
+  }
+  LHS = Tmp1;
+  RHS = Tmp2;
+}
+
+/// ExpandBIT_CONVERT - Expand a BIT_CONVERT node into a store/load combination.
+/// The resultant code need not be legal.  Note that SrcOp is the input operand
+/// to the BIT_CONVERT, not the BIT_CONVERT node itself.
+SDOperand SelectionDAGLegalize::ExpandBIT_CONVERT(MVT::ValueType DestVT, 
+                                                  SDOperand SrcOp) {
+  // Create the stack frame object.
+  SDOperand FIPtr = CreateStackTemporary(DestVT);
+  
+  // Emit a store to the stack slot.
+  SDOperand Store = DAG.getStore(DAG.getEntryNode(), SrcOp, FIPtr, NULL, 0);
+  // Result is a load from the stack slot.
+  return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0);
+}
+
+SDOperand SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+  // Create a vector sized/aligned stack slot, store the value to element #0,
+  // then load the whole vector back out.
+  SDOperand StackPtr = CreateStackTemporary(Node->getValueType(0));
+  SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Node->getOperand(0), StackPtr,
+                              NULL, 0);
+  return DAG.getLoad(Node->getValueType(0), Ch, StackPtr, NULL, 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDOperand SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+  
+  // If the only non-undef value is the low element, turn this into a 
+  // SCALAR_TO_VECTOR node.  If this is { X, X, X, X }, determine X.
+  unsigned NumElems = Node->getNumOperands();
+  bool isOnlyLowElement = true;
+  SDOperand SplatValue = Node->getOperand(0);
+  std::map<SDOperand, std::vector<unsigned> > Values;
+  Values[SplatValue].push_back(0);
+  bool isConstant = true;
+  if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) &&
+      SplatValue.getOpcode() != ISD::UNDEF)
+    isConstant = false;
+  
+  for (unsigned i = 1; i < NumElems; ++i) {
+    SDOperand V = Node->getOperand(i);
+    Values[V].push_back(i);
+    if (V.getOpcode() != ISD::UNDEF)
+      isOnlyLowElement = false;
+    if (SplatValue != V)
+      SplatValue = SDOperand(0,0);
+
+    // If this isn't a constant element or an undef, we can't use a constant
+    // pool load.
+    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) &&
+        V.getOpcode() != ISD::UNDEF)
+      isConstant = false;
+  }
+  
+  if (isOnlyLowElement) {
+    // If the low element is an undef too, then this whole things is an undef.
+    if (Node->getOperand(0).getOpcode() == ISD::UNDEF)
+      return DAG.getNode(ISD::UNDEF, Node->getValueType(0));
+    // Otherwise, turn this into a scalar_to_vector node.
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0),
+                       Node->getOperand(0));
+  }
+  
+  // If all elements are constants, create a load from the constant pool.
+  if (isConstant) {
+    MVT::ValueType VT = Node->getValueType(0);
+    const Type *OpNTy = 
+      MVT::getTypeForValueType(Node->getOperand(0).getValueType());
+    std::vector<Constant*> CV;
+    for (unsigned i = 0, e = NumElems; i != e; ++i) {
+      if (ConstantFPSDNode *V = 
+          dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+        CV.push_back(ConstantFP::get(OpNTy, V->getValue()));
+      } else if (ConstantSDNode *V = 
+                 dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+        CV.push_back(ConstantInt::get(OpNTy, V->getValue()));
+      } else {
+        assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+        CV.push_back(UndefValue::get(OpNTy));
+      }
+    }
+    Constant *CP = ConstantVector::get(CV);
+    SDOperand CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+    return DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
+  }
+  
+  if (SplatValue.Val) {   // Splat of one value?
+    // Build the shuffle constant vector: <0, 0, 0, 0>
+    MVT::ValueType MaskVT = 
+      MVT::getIntVectorWithNumElements(NumElems);
+    SDOperand Zero = DAG.getConstant(0, MVT::getVectorElementType(MaskVT));
+    std::vector<SDOperand> ZeroVec(NumElems, Zero);
+    SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+                                      &ZeroVec[0], ZeroVec.size());
+
+    // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
+    if (isShuffleLegal(Node->getValueType(0), SplatMask)) {
+      // Get the splatted value into the low element of a vector register.
+      SDOperand LowValVec = 
+        DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0), SplatValue);
+    
+      // Return shuffle(LowValVec, undef, <0,0,0,0>)
+      return DAG.getNode(ISD::VECTOR_SHUFFLE, Node->getValueType(0), LowValVec,
+                         DAG.getNode(ISD::UNDEF, Node->getValueType(0)),
+                         SplatMask);
+    }
+  }
+  
+  // If there are only two unique elements, we may be able to turn this into a
+  // vector shuffle.
+  if (Values.size() == 2) {
+    // Build the shuffle constant vector: e.g. <0, 4, 0, 4>
+    MVT::ValueType MaskVT = 
+      MVT::getIntVectorWithNumElements(NumElems);
+    std::vector<SDOperand> MaskVec(NumElems);
+    unsigned i = 0;
+    for (std::map<SDOperand,std::vector<unsigned> >::iterator I=Values.begin(),
+           E = Values.end(); I != E; ++I) {
+      for (std::vector<unsigned>::iterator II = I->second.begin(),
+             EE = I->second.end(); II != EE; ++II)
+        MaskVec[*II] = DAG.getConstant(i, MVT::getVectorElementType(MaskVT));
+      i += NumElems;
+    }
+    SDOperand ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+                                        &MaskVec[0], MaskVec.size());
+
+    // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
+    if (TLI.isOperationLegal(ISD::SCALAR_TO_VECTOR, Node->getValueType(0)) &&
+        isShuffleLegal(Node->getValueType(0), ShuffleMask)) {
+      SmallVector<SDOperand, 8> Ops;
+      for(std::map<SDOperand,std::vector<unsigned> >::iterator I=Values.begin(),
+            E = Values.end(); I != E; ++I) {
+        SDOperand Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0),
+                                   I->first);
+        Ops.push_back(Op);
+      }
+      Ops.push_back(ShuffleMask);
+
+      // Return shuffle(LoValVec, HiValVec, <0,1,0,1>)
+      return DAG.getNode(ISD::VECTOR_SHUFFLE, Node->getValueType(0), 
+                         &Ops[0], Ops.size());
+    }
+  }
+  
+  // Otherwise, we can't handle this case efficiently.  Allocate a sufficiently
+  // aligned object on the stack, store each element into it, then load
+  // the result as a vector.
+  MVT::ValueType VT = Node->getValueType(0);
+  // Create the stack frame object.
+  SDOperand FIPtr = CreateStackTemporary(VT);
+  
+  // Emit a store of each element to the stack slot.
+  SmallVector<SDOperand, 8> Stores;
+  unsigned TypeByteSize = 
+    MVT::getSizeInBits(Node->getOperand(0).getValueType())/8;
+  // Store (in the right endianness) the elements to memory.
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    // Ignore undef elements.
+    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    
+    unsigned Offset = TypeByteSize*i;
+    
+    SDOperand Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, Idx);
+    
+    Stores.push_back(DAG.getStore(DAG.getEntryNode(), Node->getOperand(i), Idx, 
+                                  NULL, 0));
+  }
+  
+  SDOperand StoreChain;
+  if (!Stores.empty())    // Not all undef elements?
+    StoreChain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                             &Stores[0], Stores.size());
+  else
+    StoreChain = DAG.getEntryNode();
+  
+  // Result is a load from the stack slot.
+  return DAG.getLoad(VT, StoreChain, FIPtr, NULL, 0);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDOperand SelectionDAGLegalize::CreateStackTemporary(MVT::ValueType VT) {
+  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  unsigned ByteSize = MVT::getSizeInBits(VT)/8;
+  const Type *Ty = MVT::getTypeForValueType(VT);
+  unsigned StackAlign = (unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty);
+  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);
+  return DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+void SelectionDAGLegalize::ExpandShiftParts(unsigned NodeOp,
+                                            SDOperand Op, SDOperand Amt,
+                                            SDOperand &Lo, SDOperand &Hi) {
+  // Expand the subcomponents.
+  SDOperand LHSL, LHSH;
+  ExpandOp(Op, LHSL, LHSH);
+
+  SDOperand Ops[] = { LHSL, LHSH, Amt };
+  MVT::ValueType VT = LHSL.getValueType();
+  Lo = DAG.getNode(NodeOp, DAG.getNodeValueTypes(VT, VT), 2, Ops, 3);
+  Hi = Lo.getValue(1);
+}
+
+
+/// ExpandShift - Try to find a clever way to expand this shift operation out to
+/// smaller elements.  If we can't find a way that is more efficient than a
+/// libcall on this target, return false.  Otherwise, return true with the
+/// low-parts expanded into Lo and Hi.
+bool SelectionDAGLegalize::ExpandShift(unsigned Opc, SDOperand Op,SDOperand Amt,
+                                       SDOperand &Lo, SDOperand &Hi) {
+  assert((Opc == ISD::SHL || Opc == ISD::SRA || Opc == ISD::SRL) &&
+         "This is not a shift!");
+
+  MVT::ValueType NVT = TLI.getTypeToTransformTo(Op.getValueType());
+  SDOperand ShAmt = LegalizeOp(Amt);
+  MVT::ValueType ShTy = ShAmt.getValueType();
+  unsigned VTBits = MVT::getSizeInBits(Op.getValueType());
+  unsigned NVTBits = MVT::getSizeInBits(NVT);
+
+  // Handle the case when Amt is an immediate.  Other cases are currently broken
+  // and are disabled.
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Amt.Val)) {
+    unsigned Cst = CN->getValue();
+    // Expand the incoming operand to be shifted, so that we have its parts
+    SDOperand InL, InH;
+    ExpandOp(Op, InL, InH);
+    switch(Opc) {
+    case ISD::SHL:
+      if (Cst > VTBits) {
+        Lo = DAG.getConstant(0, NVT);
+        Hi = DAG.getConstant(0, NVT);
+      } else if (Cst > NVTBits) {
+        Lo = DAG.getConstant(0, NVT);
+        Hi = DAG.getNode(ISD::SHL, NVT, InL, DAG.getConstant(Cst-NVTBits,ShTy));
+      } else if (Cst == NVTBits) {
+        Lo = DAG.getConstant(0, NVT);
+        Hi = InL;
+      } else {
+        Lo = DAG.getNode(ISD::SHL, NVT, InL, DAG.getConstant(Cst, ShTy));
+        Hi = DAG.getNode(ISD::OR, NVT,
+           DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(Cst, ShTy)),
+           DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(NVTBits-Cst, ShTy)));
+      }
+      return true;
+    case ISD::SRL:
+      if (Cst > VTBits) {
+        Lo = DAG.getConstant(0, NVT);
+        Hi = DAG.getConstant(0, NVT);
+      } else if (Cst > NVTBits) {
+        Lo = DAG.getNode(ISD::SRL, NVT, InH, DAG.getConstant(Cst-NVTBits,ShTy));
+        Hi = DAG.getConstant(0, NVT);
+      } else if (Cst == NVTBits) {
+        Lo = InH;
+        Hi = DAG.getConstant(0, NVT);
+      } else {
+        Lo = DAG.getNode(ISD::OR, NVT,
+           DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(Cst, ShTy)),
+           DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(NVTBits-Cst, ShTy)));
+        Hi = DAG.getNode(ISD::SRL, NVT, InH, DAG.getConstant(Cst, ShTy));
+      }
+      return true;
+    case ISD::SRA:
+      if (Cst > VTBits) {
+        Hi = Lo = DAG.getNode(ISD::SRA, NVT, InH,
+                              DAG.getConstant(NVTBits-1, ShTy));
+      } else if (Cst > NVTBits) {
+        Lo = DAG.getNode(ISD::SRA, NVT, InH,
+                           DAG.getConstant(Cst-NVTBits, ShTy));
+        Hi = DAG.getNode(ISD::SRA, NVT, InH,
+                              DAG.getConstant(NVTBits-1, ShTy));
+      } else if (Cst == NVTBits) {
+        Lo = InH;
+        Hi = DAG.getNode(ISD::SRA, NVT, InH,
+                              DAG.getConstant(NVTBits-1, ShTy));
+      } else {
+        Lo = DAG.getNode(ISD::OR, NVT,
+           DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(Cst, ShTy)),
+           DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(NVTBits-Cst, ShTy)));
+        Hi = DAG.getNode(ISD::SRA, NVT, InH, DAG.getConstant(Cst, ShTy));
+      }
+      return true;
+    }
+  }
+  
+  // Okay, the shift amount isn't constant.  However, if we can tell that it is
+  // >= 32 or < 32, we can still simplify it, without knowing the actual value.
+  uint64_t Mask = NVTBits, KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(Amt, Mask, KnownZero, KnownOne);
+  
+  // If we know that the high bit of the shift amount is one, then we can do
+  // this as a couple of simple shifts.
+  if (KnownOne & Mask) {
+    // Mask out the high bit, which we know is set.
+    Amt = DAG.getNode(ISD::AND, Amt.getValueType(), Amt,
+                      DAG.getConstant(NVTBits-1, Amt.getValueType()));
+    
+    // Expand the incoming operand to be shifted, so that we have its parts
+    SDOperand InL, InH;
+    ExpandOp(Op, InL, InH);
+    switch(Opc) {
+    case ISD::SHL:
+      Lo = DAG.getConstant(0, NVT);              // Low part is zero.
+      Hi = DAG.getNode(ISD::SHL, NVT, InL, Amt); // High part from Lo part.
+      return true;
+    case ISD::SRL:
+      Hi = DAG.getConstant(0, NVT);              // Hi part is zero.
+      Lo = DAG.getNode(ISD::SRL, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    case ISD::SRA:
+      Hi = DAG.getNode(ISD::SRA, NVT, InH,       // Sign extend high part.
+                       DAG.getConstant(NVTBits-1, Amt.getValueType()));
+      Lo = DAG.getNode(ISD::SRA, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    }
+  }
+  
+  // If we know that the high bit of the shift amount is zero, then we can do
+  // this as a couple of simple shifts.
+  if (KnownZero & Mask) {
+    // Compute 32-amt.
+    SDOperand Amt2 = DAG.getNode(ISD::SUB, Amt.getValueType(),
+                                 DAG.getConstant(NVTBits, Amt.getValueType()),
+                                 Amt);
+    
+    // Expand the incoming operand to be shifted, so that we have its parts
+    SDOperand InL, InH;
+    ExpandOp(Op, InL, InH);
+    switch(Opc) {
+    case ISD::SHL:
+      Lo = DAG.getNode(ISD::SHL, NVT, InL, Amt);
+      Hi = DAG.getNode(ISD::OR, NVT,
+                       DAG.getNode(ISD::SHL, NVT, InH, Amt),
+                       DAG.getNode(ISD::SRL, NVT, InL, Amt2));
+      return true;
+    case ISD::SRL:
+      Hi = DAG.getNode(ISD::SRL, NVT, InH, Amt);
+      Lo = DAG.getNode(ISD::OR, NVT,
+                       DAG.getNode(ISD::SRL, NVT, InL, Amt),
+                       DAG.getNode(ISD::SHL, NVT, InH, Amt2));
+      return true;
+    case ISD::SRA:
+      Hi = DAG.getNode(ISD::SRA, NVT, InH, Amt);
+      Lo = DAG.getNode(ISD::OR, NVT,
+                       DAG.getNode(ISD::SRL, NVT, InL, Amt),
+                       DAG.getNode(ISD::SHL, NVT, InH, Amt2));
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+
+// ExpandLibCall - Expand a node into a call to a libcall.  If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument.  If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDOperand SelectionDAGLegalize::ExpandLibCall(const char *Name, SDNode *Node,
+                                              bool isSigned, SDOperand &Hi) {
+  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+  // The input chain to this libcall is the entry node of the function. 
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDOperand InChain = DAG.getEntryNode();
+  
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    MVT::ValueType ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = MVT::getTypeForValueType(ArgVT);
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; 
+    Entry.isSExt = isSigned;
+    Args.push_back(Entry);
+  }
+  SDOperand Callee = DAG.getExternalSymbol(Name, TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = MVT::getTypeForValueType(Node->getValueType(0));
+  std::pair<SDOperand,SDOperand> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, false, CallingConv::C, false,
+                    Callee, Args, DAG);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  SDOperand Result;
+  switch (getTypeAction(CallInfo.first.getValueType())) {
+  default: assert(0 && "Unknown thing");
+  case Legal:
+    Result = CallInfo.first;
+    break;
+  case Expand:
+    ExpandOp(CallInfo.first, Result, Hi);
+    break;
+  }
+  return Result;
+}
+
+
+/// ExpandIntToFP - Expand a [US]INT_TO_FP operation.
+///
+SDOperand SelectionDAGLegalize::
+ExpandIntToFP(bool isSigned, MVT::ValueType DestTy, SDOperand Source) {
+  assert(getTypeAction(Source.getValueType()) == Expand &&
+         "This is not an expansion!");
+  assert(Source.getValueType() == MVT::i64 && "Only handle expand from i64!");
+
+  if (!isSigned) {
+    assert(Source.getValueType() == MVT::i64 &&
+           "This only works for 64-bit -> FP");
+    // The 64-bit value loaded will be incorrectly if the 'sign bit' of the
+    // incoming integer is set.  To handle this, we dynamically test to see if
+    // it is set, and, if so, add a fudge factor.
+    SDOperand Lo, Hi;
+    ExpandOp(Source, Lo, Hi);
+
+    // If this is unsigned, and not supported, first perform the conversion to
+    // signed, then adjust the result if the sign bit is set.
+    SDOperand SignedConv = ExpandIntToFP(true, DestTy,
+                   DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), Lo, Hi));
+
+    SDOperand SignSet = DAG.getSetCC(TLI.getSetCCResultTy(), Hi,
+                                     DAG.getConstant(0, Hi.getValueType()),
+                                     ISD::SETLT);
+    SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4);
+    SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(),
+                                      SignSet, Four, Zero);
+    uint64_t FF = 0x5f800000ULL;
+    if (TLI.isLittleEndian()) FF <<= 32;
+    static Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+
+    SDOperand CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+    CPIdx = DAG.getNode(ISD::ADD, TLI.getPointerTy(), CPIdx, CstOffset);
+    SDOperand FudgeInReg;
+    if (DestTy == MVT::f32)
+      FudgeInReg = DAG.getLoad(MVT::f32, DAG.getEntryNode(), CPIdx, NULL, 0);
+    else {
+      assert(DestTy == MVT::f64 && "Unexpected conversion");
+      // FIXME: Avoid the extend by construction the right constantpool?
+      FudgeInReg = DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, DAG.getEntryNode(),
+                                  CPIdx, NULL, 0, MVT::f32);
+    }
+    MVT::ValueType SCVT = SignedConv.getValueType();
+    if (SCVT != DestTy) {
+      // Destination type needs to be expanded as well. The FADD now we are
+      // constructing will be expanded into a libcall.
+      if (MVT::getSizeInBits(SCVT) != MVT::getSizeInBits(DestTy)) {
+        assert(SCVT == MVT::i32 && DestTy == MVT::f64);
+        SignedConv = DAG.getNode(ISD::BUILD_PAIR, MVT::i64,
+                                 SignedConv, SignedConv.getValue(1));
+      }
+      SignedConv = DAG.getNode(ISD::BIT_CONVERT, DestTy, SignedConv);
+    }
+    return DAG.getNode(ISD::FADD, DestTy, SignedConv, FudgeInReg);
+  }
+
+  // Check to see if the target has a custom way to lower this.  If so, use it.
+  switch (TLI.getOperationAction(ISD::SINT_TO_FP, Source.getValueType())) {
+  default: assert(0 && "This action not implemented for this operation!");
+  case TargetLowering::Legal:
+  case TargetLowering::Expand:
+    break;   // This case is handled below.
+  case TargetLowering::Custom: {
+    SDOperand NV = TLI.LowerOperation(DAG.getNode(ISD::SINT_TO_FP, DestTy,
+                                                  Source), DAG);
+    if (NV.Val)
+      return LegalizeOp(NV);
+    break;   // The target decided this was legal after all
+  }
+  }
+
+  // Expand the source, then glue it back together for the call.  We must expand
+  // the source in case it is shared (this pass of legalize must traverse it).
+  SDOperand SrcLo, SrcHi;
+  ExpandOp(Source, SrcLo, SrcHi);
+  Source = DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), SrcLo, SrcHi);
+
+  RTLIB::Libcall LC;
+  if (DestTy == MVT::f32)
+    LC = RTLIB::SINTTOFP_I64_F32;
+  else {
+    assert(DestTy == MVT::f64 && "Unknown fp value type!");
+    LC = RTLIB::SINTTOFP_I64_F64;
+  }
+  
+  assert(TLI.getLibcallName(LC) && "Don't know how to expand this SINT_TO_FP!");
+  Source = DAG.getNode(ISD::SINT_TO_FP, DestTy, Source);
+  SDOperand UnusedHiPart;
+  return ExpandLibCall(TLI.getLibcallName(LC), Source.Val, isSigned,
+                       UnusedHiPart);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it.  At this point, we know that the result and operand types are
+/// legal for the target.
+SDOperand SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+                                                     SDOperand Op0,
+                                                     MVT::ValueType DestVT) {
+  if (Op0.getValueType() == MVT::i32) {
+    // simple 32-bit [signed|unsigned] integer to float/double expansion
+    
+    // get the stack frame index of a 8 byte buffer, pessimistically aligned
+    MachineFunction &MF = DAG.getMachineFunction();
+    const Type *F64Type = MVT::getTypeForValueType(MVT::f64);
+    unsigned StackAlign =
+      (unsigned)TLI.getTargetData()->getPrefTypeAlignment(F64Type);
+    int SSFI = MF.getFrameInfo()->CreateStackObject(8, StackAlign);
+    // get address of 8 byte buffer
+    SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+    // word offset constant for Hi/Lo address computation
+    SDOperand WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+    // set up Hi and Lo (into buffer) address based on endian
+    SDOperand Hi = StackSlot;
+    SDOperand Lo = DAG.getNode(ISD::ADD, TLI.getPointerTy(), StackSlot,WordOff);
+    if (TLI.isLittleEndian())
+      std::swap(Hi, Lo);
+    
+    // if signed map to unsigned space
+    SDOperand Op0Mapped;
+    if (isSigned) {
+      // constant used to invert sign bit (signed to unsigned mapping)
+      SDOperand SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+      Op0Mapped = DAG.getNode(ISD::XOR, MVT::i32, Op0, SignBit);
+    } else {
+      Op0Mapped = Op0;
+    }
+    // store the lo of the constructed double - based on integer input
+    SDOperand Store1 = DAG.getStore(DAG.getEntryNode(),
+                                    Op0Mapped, Lo, NULL, 0);
+    // initial hi portion of constructed double
+    SDOperand InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+    // store the hi of the constructed double - biased exponent
+    SDOperand Store2=DAG.getStore(Store1, InitialHi, Hi, NULL, 0);
+    // load the constructed double
+    SDOperand Load = DAG.getLoad(MVT::f64, Store2, StackSlot, NULL, 0);
+    // FP constant to bias correct the final result
+    SDOperand Bias = DAG.getConstantFP(isSigned ?
+                                            BitsToDouble(0x4330000080000000ULL)
+                                          : BitsToDouble(0x4330000000000000ULL),
+                                     MVT::f64);
+    // subtract the bias
+    SDOperand Sub = DAG.getNode(ISD::FSUB, MVT::f64, Load, Bias);
+    // final result
+    SDOperand Result;
+    // handle final rounding
+    if (DestVT == MVT::f64) {
+      // do nothing
+      Result = Sub;
+    } else {
+     // if f32 then cast to f32
+      Result = DAG.getNode(ISD::FP_ROUND, MVT::f32, Sub);
+    }
+    return Result;
+  }
+  assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+  SDOperand Tmp1 = DAG.getNode(ISD::SINT_TO_FP, DestVT, Op0);
+
+  SDOperand SignSet = DAG.getSetCC(TLI.getSetCCResultTy(), Op0,
+                                   DAG.getConstant(0, Op0.getValueType()),
+                                   ISD::SETLT);
+  SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4);
+  SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(),
+                                    SignSet, Four, Zero);
+
+  // If the sign bit of the integer is set, the large number will be treated
+  // as a negative number.  To counteract this, the dynamic code adds an
+  // offset depending on the data type.
+  uint64_t FF;
+  switch (Op0.getValueType()) {
+  default: assert(0 && "Unsupported integer type!");
+  case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
+  case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
+  case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
+  case MVT::i64: FF = 0x5F800000ULL; break;  // 2^64 (as a float)
+  }
+  if (TLI.isLittleEndian()) FF <<= 32;
+  static Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+
+  SDOperand CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+  CPIdx = DAG.getNode(ISD::ADD, TLI.getPointerTy(), CPIdx, CstOffset);
+  SDOperand FudgeInReg;
+  if (DestVT == MVT::f32)
+    FudgeInReg = DAG.getLoad(MVT::f32, DAG.getEntryNode(), CPIdx, NULL, 0);
+  else {
+    assert(DestVT == MVT::f64 && "Unexpected conversion");
+    FudgeInReg = LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, MVT::f64,
+                                           DAG.getEntryNode(), CPIdx,
+                                           NULL, 0, MVT::f32));
+  }
+
+  return DAG.getNode(ISD::FADD, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDOperand SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDOperand LegalOp,
+                                                      MVT::ValueType DestVT,
+                                                      bool isSigned) {
+  // First step, figure out the appropriate *INT_TO_FP operation to use.
+  MVT::ValueType NewInTy = LegalOp.getValueType();
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewInTy = (MVT::ValueType)(NewInTy+1);
+    assert(MVT::isInteger(NewInTy) && "Ran out of possibilities!");
+
+    // If the target supports SINT_TO_FP of this type, use it.
+    switch (TLI.getOperationAction(ISD::SINT_TO_FP, NewInTy)) {
+      default: break;
+      case TargetLowering::Legal:
+        if (!TLI.isTypeLegal(NewInTy))
+          break;  // Can't use this datatype.
+        // FALL THROUGH.
+      case TargetLowering::Custom:
+        OpToUse = ISD::SINT_TO_FP;
+        break;
+    }
+    if (OpToUse) break;
+    if (isSigned) continue;
+
+    // If the target supports UINT_TO_FP of this type, use it.
+    switch (TLI.getOperationAction(ISD::UINT_TO_FP, NewInTy)) {
+      default: break;
+      case TargetLowering::Legal:
+        if (!TLI.isTypeLegal(NewInTy))
+          break;  // Can't use this datatype.
+        // FALL THROUGH.
+      case TargetLowering::Custom:
+        OpToUse = ISD::UINT_TO_FP;
+        break;
+    }
+    if (OpToUse) break;
+
+    // Otherwise, try a larger type.
+  }
+
+  // Okay, we found the operation and type to use.  Zero extend our input to the
+  // desired type then run the operation on it.
+  return DAG.getNode(OpToUse, DestVT,
+                     DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                                 NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDOperand SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDOperand LegalOp,
+                                                      MVT::ValueType DestVT,
+                                                      bool isSigned) {
+  // First step, figure out the appropriate FP_TO*INT operation to use.
+  MVT::ValueType NewOutTy = DestVT;
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewOutTy = (MVT::ValueType)(NewOutTy+1);
+    assert(MVT::isInteger(NewOutTy) && "Ran out of possibilities!");
+
+    // If the target supports FP_TO_SINT returning this type, use it.
+    switch (TLI.getOperationAction(ISD::FP_TO_SINT, NewOutTy)) {
+    default: break;
+    case TargetLowering::Legal:
+      if (!TLI.isTypeLegal(NewOutTy))
+        break;  // Can't use this datatype.
+      // FALL THROUGH.
+    case TargetLowering::Custom:
+      OpToUse = ISD::FP_TO_SINT;
+      break;
+    }
+    if (OpToUse) break;
+
+    // If the target supports FP_TO_UINT of this type, use it.
+    switch (TLI.getOperationAction(ISD::FP_TO_UINT, NewOutTy)) {
+    default: break;
+    case TargetLowering::Legal:
+      if (!TLI.isTypeLegal(NewOutTy))
+        break;  // Can't use this datatype.
+      // FALL THROUGH.
+    case TargetLowering::Custom:
+      OpToUse = ISD::FP_TO_UINT;
+      break;
+    }
+    if (OpToUse) break;
+
+    // Otherwise, try a larger type.
+  }
+
+  // Okay, we found the operation and type to use.  Truncate the result of the
+  // extended FP_TO_*INT operation to the desired size.
+  return DAG.getNode(ISD::TRUNCATE, DestVT,
+                     DAG.getNode(OpToUse, NewOutTy, LegalOp));
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDOperand SelectionDAGLegalize::ExpandBSWAP(SDOperand Op) {
+  MVT::ValueType VT = Op.getValueType();
+  MVT::ValueType SHVT = TLI.getShiftAmountTy();
+  SDOperand Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+  switch (VT) {
+  default: assert(0 && "Unhandled Expand type in BSWAP!"); abort();
+  case MVT::i16:
+    Tmp2 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+    return DAG.getNode(ISD::OR, VT, Tmp1, Tmp2);
+  case MVT::i32:
+    Tmp4 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::AND, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+    Tmp2 = DAG.getNode(ISD::AND, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+    Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, VT, Tmp2, Tmp1);
+    return DAG.getNode(ISD::OR, VT, Tmp4, Tmp2);
+  case MVT::i64:
+    Tmp8 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp6 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp5 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp4 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp3 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::AND, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+    Tmp6 = DAG.getNode(ISD::AND, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+    Tmp5 = DAG.getNode(ISD::AND, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+    Tmp4 = DAG.getNode(ISD::AND, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+    Tmp3 = DAG.getNode(ISD::AND, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+    Tmp2 = DAG.getNode(ISD::AND, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+    Tmp8 = DAG.getNode(ISD::OR, VT, Tmp8, Tmp7);
+    Tmp6 = DAG.getNode(ISD::OR, VT, Tmp6, Tmp5);
+    Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, VT, Tmp2, Tmp1);
+    Tmp8 = DAG.getNode(ISD::OR, VT, Tmp8, Tmp6);
+    Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp2);
+    return DAG.getNode(ISD::OR, VT, Tmp8, Tmp4);
+  }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDOperand SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDOperand Op) {
+  switch (Opc) {
+  default: assert(0 && "Cannot expand this yet!");
+  case ISD::CTPOP: {
+    static const uint64_t mask[6] = {
+      0x5555555555555555ULL, 0x3333333333333333ULL,
+      0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+      0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+    };
+    MVT::ValueType VT = Op.getValueType();
+    MVT::ValueType ShVT = TLI.getShiftAmountTy();
+    unsigned len = MVT::getSizeInBits(VT);
+    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+      //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
+      SDOperand Tmp2 = DAG.getConstant(mask[i], VT);
+      SDOperand Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      Op = DAG.getNode(ISD::ADD, VT, DAG.getNode(ISD::AND, VT, Op, Tmp2),
+                       DAG.getNode(ISD::AND, VT,
+                                   DAG.getNode(ISD::SRL, VT, Op, Tmp3),Tmp2));
+    }
+    return Op;
+  }
+  case ISD::CTLZ: {
+    // for now, we do this:
+    // x = x | (x >> 1);
+    // x = x | (x >> 2);
+    // ...
+    // x = x | (x >>16);
+    // x = x | (x >>32); // for 64-bit input
+    // return popcount(~x);
+    //
+    // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+    MVT::ValueType VT = Op.getValueType();
+    MVT::ValueType ShVT = TLI.getShiftAmountTy();
+    unsigned len = MVT::getSizeInBits(VT);
+    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+      SDOperand Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      Op = DAG.getNode(ISD::OR, VT, Op, DAG.getNode(ISD::SRL, VT, Op, Tmp3));
+    }
+    Op = DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(~0ULL, VT));
+    return DAG.getNode(ISD::CTPOP, VT, Op);
+  }
+  case ISD::CTTZ: {
+    // for now, we use: { return popcount(~x & (x - 1)); }
+    // unless the target has ctlz but not ctpop, in which case we use:
+    // { return 32 - nlz(~x & (x-1)); }
+    // see also http://www.hackersdelight.org/HDcode/ntz.cc
+    MVT::ValueType VT = Op.getValueType();
+    SDOperand Tmp2 = DAG.getConstant(~0ULL, VT);
+    SDOperand Tmp3 = DAG.getNode(ISD::AND, VT,
+                       DAG.getNode(ISD::XOR, VT, Op, Tmp2),
+                       DAG.getNode(ISD::SUB, VT, Op, DAG.getConstant(1, VT)));
+    // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+    if (!TLI.isOperationLegal(ISD::CTPOP, VT) &&
+        TLI.isOperationLegal(ISD::CTLZ, VT))
+      return DAG.getNode(ISD::SUB, VT,
+                         DAG.getConstant(MVT::getSizeInBits(VT), VT),
+                         DAG.getNode(ISD::CTLZ, VT, Tmp3));
+    return DAG.getNode(ISD::CTPOP, VT, Tmp3);
+  }
+  }
+}
+
+/// ExpandOp - Expand the specified SDOperand into its two component pieces
+/// Lo&Hi.  Note that the Op MUST be an expanded type.  As a result of this, the
+/// LegalizeNodes map is filled in for any results that are not expanded, the
+/// ExpandedNodes map is filled in for any results that are expanded, and the
+/// Lo/Hi values are returned.
+void SelectionDAGLegalize::ExpandOp(SDOperand Op, SDOperand &Lo, SDOperand &Hi){
+  MVT::ValueType VT = Op.getValueType();
+  MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+  SDNode *Node = Op.Val;
+  assert(getTypeAction(VT) == Expand && "Not an expanded type!");
+  assert(((MVT::isInteger(NVT) && NVT < VT) || MVT::isFloatingPoint(VT) ||
+         MVT::isVector(VT)) &&
+         "Cannot expand to FP value or to larger int value!");
+
+  // See if we already expanded it.
+  DenseMap<SDOperand, std::pair<SDOperand, SDOperand> >::iterator I
+    = ExpandedNodes.find(Op);
+  if (I != ExpandedNodes.end()) {
+    Lo = I->second.first;
+    Hi = I->second.second;
+    return;
+  }
+
+  switch (Node->getOpcode()) {
+  case ISD::CopyFromReg:
+    assert(0 && "CopyFromReg must be legal!");
+  default:
+#ifndef NDEBUG
+    cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to expand this operator!");
+    abort();
+  case ISD::UNDEF:
+    NVT = TLI.getTypeToExpandTo(VT);
+    Lo = DAG.getNode(ISD::UNDEF, NVT);
+    Hi = DAG.getNode(ISD::UNDEF, NVT);
+    break;
+  case ISD::Constant: {
+    uint64_t Cst = cast<ConstantSDNode>(Node)->getValue();
+    Lo = DAG.getConstant(Cst, NVT);
+    Hi = DAG.getConstant(Cst >> MVT::getSizeInBits(NVT), NVT);
+    break;
+  }
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+    Lo = ExpandConstantFP(CFP, false, DAG, TLI);
+    if (getTypeAction(Lo.getValueType()) == Expand)
+      ExpandOp(Lo, Lo, Hi);
+    break;
+  }
+  case ISD::BUILD_PAIR:
+    // Return the operands.
+    Lo = Node->getOperand(0);
+    Hi = Node->getOperand(1);
+    break;
+    
+  case ISD::SIGN_EXTEND_INREG:
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    // sext_inreg the low part if needed.
+    Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Lo, Node->getOperand(1));
+    
+    // The high part gets the sign extension from the lo-part.  This handles
+    // things like sextinreg V:i64 from i8.
+    Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+                     DAG.getConstant(MVT::getSizeInBits(NVT)-1,
+                                     TLI.getShiftAmountTy()));
+    break;
+
+  case ISD::BSWAP: {
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    SDOperand TempLo = DAG.getNode(ISD::BSWAP, NVT, Hi);
+    Hi = DAG.getNode(ISD::BSWAP, NVT, Lo);
+    Lo = TempLo;
+    break;
+  }
+    
+  case ISD::CTPOP:
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    Lo = DAG.getNode(ISD::ADD, NVT,          // ctpop(HL) -> ctpop(H)+ctpop(L)
+                     DAG.getNode(ISD::CTPOP, NVT, Lo),
+                     DAG.getNode(ISD::CTPOP, NVT, Hi));
+    Hi = DAG.getConstant(0, NVT);
+    break;
+
+  case ISD::CTLZ: {
+    // ctlz (HL) -> ctlz(H) != 32 ? ctlz(H) : (ctlz(L)+32)
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT);
+    SDOperand HLZ = DAG.getNode(ISD::CTLZ, NVT, Hi);
+    SDOperand TopNotZero = DAG.getSetCC(TLI.getSetCCResultTy(), HLZ, BitsC,
+                                        ISD::SETNE);
+    SDOperand LowPart = DAG.getNode(ISD::CTLZ, NVT, Lo);
+    LowPart = DAG.getNode(ISD::ADD, NVT, LowPart, BitsC);
+
+    Lo = DAG.getNode(ISD::SELECT, NVT, TopNotZero, HLZ, LowPart);
+    Hi = DAG.getConstant(0, NVT);
+    break;
+  }
+
+  case ISD::CTTZ: {
+    // cttz (HL) -> cttz(L) != 32 ? cttz(L) : (cttz(H)+32)
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT);
+    SDOperand LTZ = DAG.getNode(ISD::CTTZ, NVT, Lo);
+    SDOperand BotNotZero = DAG.getSetCC(TLI.getSetCCResultTy(), LTZ, BitsC,
+                                        ISD::SETNE);
+    SDOperand HiPart = DAG.getNode(ISD::CTTZ, NVT, Hi);
+    HiPart = DAG.getNode(ISD::ADD, NVT, HiPart, BitsC);
+
+    Lo = DAG.getNode(ISD::SELECT, NVT, BotNotZero, LTZ, HiPart);
+    Hi = DAG.getConstant(0, NVT);
+    break;
+  }
+
+  case ISD::VAARG: {
+    SDOperand Ch = Node->getOperand(0);   // Legalize the chain.
+    SDOperand Ptr = Node->getOperand(1);  // Legalize the pointer.
+    Lo = DAG.getVAArg(NVT, Ch, Ptr, Node->getOperand(2));
+    Hi = DAG.getVAArg(NVT, Lo.getValue(1), Ptr, Node->getOperand(2));
+
+    // Remember that we legalized the chain.
+    Hi = LegalizeOp(Hi);
+    AddLegalizedOperand(Op.getValue(1), Hi.getValue(1));
+    if (!TLI.isLittleEndian())
+      std::swap(Lo, Hi);
+    break;
+  }
+    
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    SDOperand Ch  = LD->getChain();    // Legalize the chain.
+    SDOperand Ptr = LD->getBasePtr();  // Legalize the pointer.
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    int SVOffset = LD->getSrcValueOffset();
+    unsigned Alignment = LD->getAlignment();
+    bool isVolatile = LD->isVolatile();
+
+    if (ExtType == ISD::NON_EXTLOAD) {
+      Lo = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), SVOffset,
+                       isVolatile, Alignment);
+      if (VT == MVT::f32 || VT == MVT::f64) {
+        // f32->i32 or f64->i64 one to one expansion.
+        // Remember that we legalized the chain.
+        AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Lo.getValue(1)));
+        // Recursively expand the new load.
+        if (getTypeAction(NVT) == Expand)
+          ExpandOp(Lo, Lo, Hi);
+        break;
+      }
+
+      // Increment the pointer to the other half.
+      unsigned IncrementSize = MVT::getSizeInBits(Lo.getValueType())/8;
+      Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+                        getIntPtrConstant(IncrementSize));
+      SVOffset += IncrementSize;
+      if (Alignment > IncrementSize)
+        Alignment = IncrementSize;
+      Hi = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), SVOffset,
+                       isVolatile, Alignment);
+
+      // Build a factor node to remember that this load is independent of the
+      // other one.
+      SDOperand TF = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo.getValue(1),
+                                 Hi.getValue(1));
+
+      // Remember that we legalized the chain.
+      AddLegalizedOperand(Op.getValue(1), LegalizeOp(TF));
+      if (!TLI.isLittleEndian())
+        std::swap(Lo, Hi);
+    } else {
+      MVT::ValueType EVT = LD->getLoadedVT();
+
+      if (VT == MVT::f64 && EVT == MVT::f32) {
+        // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+        SDOperand Load = DAG.getLoad(EVT, Ch, Ptr, LD->getSrcValue(),
+                                     SVOffset, isVolatile, Alignment);
+        // Remember that we legalized the chain.
+        AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Load.getValue(1)));
+        ExpandOp(DAG.getNode(ISD::FP_EXTEND, VT, Load), Lo, Hi);
+        break;
+      }
+    
+      if (EVT == NVT)
+        Lo = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(),
+                         SVOffset, isVolatile, Alignment);
+      else
+        Lo = DAG.getExtLoad(ExtType, NVT, Ch, Ptr, LD->getSrcValue(),
+                            SVOffset, EVT, isVolatile,
+                            Alignment);
+    
+      // Remember that we legalized the chain.
+      AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Lo.getValue(1)));
+
+      if (ExtType == ISD::SEXTLOAD) {
+        // The high part is obtained by SRA'ing all but one of the bits of the
+        // lo part.
+        unsigned LoSize = MVT::getSizeInBits(Lo.getValueType());
+        Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+                         DAG.getConstant(LoSize-1, TLI.getShiftAmountTy()));
+      } else if (ExtType == ISD::ZEXTLOAD) {
+        // The high part is just a zero.
+        Hi = DAG.getConstant(0, NVT);
+      } else /* if (ExtType == ISD::EXTLOAD) */ {
+        // The high part is undefined.
+        Hi = DAG.getNode(ISD::UNDEF, NVT);
+      }
+    }
+    break;
+  }
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {   // Simple logical operators -> two trivial pieces.
+    SDOperand LL, LH, RL, RH;
+    ExpandOp(Node->getOperand(0), LL, LH);
+    ExpandOp(Node->getOperand(1), RL, RH);
+    Lo = DAG.getNode(Node->getOpcode(), NVT, LL, RL);
+    Hi = DAG.getNode(Node->getOpcode(), NVT, LH, RH);
+    break;
+  }
+  case ISD::SELECT: {
+    SDOperand LL, LH, RL, RH;
+    ExpandOp(Node->getOperand(1), LL, LH);
+    ExpandOp(Node->getOperand(2), RL, RH);
+    if (getTypeAction(NVT) == Expand)
+      NVT = TLI.getTypeToExpandTo(NVT);
+    Lo = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), LL, RL);
+    if (VT != MVT::f32)
+      Hi = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), LH, RH);
+    break;
+  }
+  case ISD::SELECT_CC: {
+    SDOperand TL, TH, FL, FH;
+    ExpandOp(Node->getOperand(2), TL, TH);
+    ExpandOp(Node->getOperand(3), FL, FH);
+    if (getTypeAction(NVT) == Expand)
+      NVT = TLI.getTypeToExpandTo(NVT);
+    Lo = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+                     Node->getOperand(1), TL, FL, Node->getOperand(4));
+    if (VT != MVT::f32)
+      Hi = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+                       Node->getOperand(1), TH, FH, Node->getOperand(4));
+    break;
+  }
+  case ISD::ANY_EXTEND:
+    // The low part is any extension of the input (which degenerates to a copy).
+    Lo = DAG.getNode(ISD::ANY_EXTEND, NVT, Node->getOperand(0));
+    // The high part is undefined.
+    Hi = DAG.getNode(ISD::UNDEF, NVT);
+    break;
+  case ISD::SIGN_EXTEND: {
+    // The low part is just a sign extension of the input (which degenerates to
+    // a copy).
+    Lo = DAG.getNode(ISD::SIGN_EXTEND, NVT, Node->getOperand(0));
+
+    // The high part is obtained by SRA'ing all but one of the bits of the lo
+    // part.
+    unsigned LoSize = MVT::getSizeInBits(Lo.getValueType());
+    Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+                     DAG.getConstant(LoSize-1, TLI.getShiftAmountTy()));
+    break;
+  }
+  case ISD::ZERO_EXTEND:
+    // The low part is just a zero extension of the input (which degenerates to
+    // a copy).
+    Lo = DAG.getNode(ISD::ZERO_EXTEND, NVT, Node->getOperand(0));
+
+    // The high part is just a zero.
+    Hi = DAG.getConstant(0, NVT);
+    break;
+    
+  case ISD::TRUNCATE: {
+    // The input value must be larger than this value.  Expand *it*.
+    SDOperand NewLo;
+    ExpandOp(Node->getOperand(0), NewLo, Hi);
+    
+    // The low part is now either the right size, or it is closer.  If not the
+    // right size, make an illegal truncate so we recursively expand it.
+    if (NewLo.getValueType() != Node->getValueType(0))
+      NewLo = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), NewLo);
+    ExpandOp(NewLo, Lo, Hi);
+    break;
+  }
+    
+  case ISD::BIT_CONVERT: {
+    SDOperand Tmp;
+    if (TLI.getOperationAction(ISD::BIT_CONVERT, VT) == TargetLowering::Custom){
+      // If the target wants to, allow it to lower this itself.
+      switch (getTypeAction(Node->getOperand(0).getValueType())) {
+      case Expand: assert(0 && "cannot expand FP!");
+      case Legal:   Tmp = LegalizeOp(Node->getOperand(0)); break;
+      case Promote: Tmp = PromoteOp (Node->getOperand(0)); break;
+      }
+      Tmp = TLI.LowerOperation(DAG.getNode(ISD::BIT_CONVERT, VT, Tmp), DAG);
+    }
+
+    // f32 / f64 must be expanded to i32 / i64.
+    if (VT == MVT::f32 || VT == MVT::f64) {
+      Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+      if (getTypeAction(NVT) == Expand)
+        ExpandOp(Lo, Lo, Hi);
+      break;
+    }
+
+    // If source operand will be expanded to the same type as VT, i.e.
+    // i64 <- f64, i32 <- f32, expand the source operand instead.
+    MVT::ValueType VT0 = Node->getOperand(0).getValueType();
+    if (getTypeAction(VT0) == Expand && TLI.getTypeToTransformTo(VT0) == VT) {
+      ExpandOp(Node->getOperand(0), Lo, Hi);
+      break;
+    }
+
+    // Turn this into a load/store pair by default.
+    if (Tmp.Val == 0)
+      Tmp = ExpandBIT_CONVERT(VT, Node->getOperand(0));
+    
+    ExpandOp(Tmp, Lo, Hi);
+    break;
+  }
+
+  case ISD::READCYCLECOUNTER:
+    assert(TLI.getOperationAction(ISD::READCYCLECOUNTER, VT) == 
+                 TargetLowering::Custom &&
+           "Must custom expand ReadCycleCounter");
+    Lo = TLI.LowerOperation(Op, DAG);
+    assert(Lo.Val && "Node must be custom expanded!");
+    Hi = Lo.getValue(1);
+    AddLegalizedOperand(SDOperand(Node, 1), // Remember we legalized the chain.
+                        LegalizeOp(Lo.getValue(2)));
+    break;
+
+    // These operators cannot be expanded directly, emit them as calls to
+    // library functions.
+  case ISD::FP_TO_SINT: {
+    if (TLI.getOperationAction(ISD::FP_TO_SINT, VT) == TargetLowering::Custom) {
+      SDOperand Op;
+      switch (getTypeAction(Node->getOperand(0).getValueType())) {
+      case Expand: assert(0 && "cannot expand FP!");
+      case Legal:   Op = LegalizeOp(Node->getOperand(0)); break;
+      case Promote: Op = PromoteOp (Node->getOperand(0)); break;
+      }
+
+      Op = TLI.LowerOperation(DAG.getNode(ISD::FP_TO_SINT, VT, Op), DAG);
+
+      // Now that the custom expander is done, expand the result, which is still
+      // VT.
+      if (Op.Val) {
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+
+    RTLIB::Libcall LC;
+    if (Node->getOperand(0).getValueType() == MVT::f32)
+      LC = RTLIB::FPTOSINT_F32_I64;
+    else
+      LC = RTLIB::FPTOSINT_F64_I64;
+    Lo = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                       false/*sign irrelevant*/, Hi);
+    break;
+  }
+
+  case ISD::FP_TO_UINT: {
+    if (TLI.getOperationAction(ISD::FP_TO_UINT, VT) == TargetLowering::Custom) {
+      SDOperand Op;
+      switch (getTypeAction(Node->getOperand(0).getValueType())) {
+        case Expand: assert(0 && "cannot expand FP!");
+        case Legal:   Op = LegalizeOp(Node->getOperand(0)); break;
+        case Promote: Op = PromoteOp (Node->getOperand(0)); break;
+      }
+        
+      Op = TLI.LowerOperation(DAG.getNode(ISD::FP_TO_UINT, VT, Op), DAG);
+
+      // Now that the custom expander is done, expand the result.
+      if (Op.Val) {
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+
+    RTLIB::Libcall LC;
+    if (Node->getOperand(0).getValueType() == MVT::f32)
+      LC = RTLIB::FPTOUINT_F32_I64;
+    else
+      LC = RTLIB::FPTOUINT_F64_I64;
+    Lo = ExpandLibCall(TLI.getLibcallName(LC), Node,
+                       false/*sign irrelevant*/, Hi);
+    break;
+  }
+
+  case ISD::SHL: {
+    // If the target wants custom lowering, do so.
+    SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+    if (TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Custom) {
+      SDOperand Op = DAG.getNode(ISD::SHL, VT, Node->getOperand(0), ShiftAmt);
+      Op = TLI.LowerOperation(Op, DAG);
+      if (Op.Val) {
+        // Now that the custom expander is done, expand the result, which is
+        // still VT.
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+    
+    // If ADDC/ADDE are supported and if the shift amount is a constant 1, emit 
+    // this X << 1 as X+X.
+    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+      if (ShAmt->getValue() == 1 && TLI.isOperationLegal(ISD::ADDC, NVT) && 
+          TLI.isOperationLegal(ISD::ADDE, NVT)) {
+        SDOperand LoOps[2], HiOps[3];
+        ExpandOp(Node->getOperand(0), LoOps[0], HiOps[0]);
+        SDVTList VTList = DAG.getVTList(LoOps[0].getValueType(), MVT::Flag);
+        LoOps[1] = LoOps[0];
+        Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+
+        HiOps[1] = HiOps[0];
+        HiOps[2] = Lo.getValue(1);
+        Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+        break;
+      }
+    }
+    
+    // If we can emit an efficient shift operation, do so now.
+    if (ExpandShift(ISD::SHL, Node->getOperand(0), ShiftAmt, Lo, Hi))
+      break;
+
+    // If this target supports SHL_PARTS, use it.
+    TargetLowering::LegalizeAction Action =
+      TLI.getOperationAction(ISD::SHL_PARTS, NVT);
+    if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+        Action == TargetLowering::Custom) {
+      ExpandShiftParts(ISD::SHL_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+      break;
+    }
+
+    // Otherwise, emit a libcall.
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SHL_I64), Node,
+                       false/*left shift=unsigned*/, Hi);
+    break;
+  }
+
+  case ISD::SRA: {
+    // If the target wants custom lowering, do so.
+    SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+    if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Custom) {
+      SDOperand Op = DAG.getNode(ISD::SRA, VT, Node->getOperand(0), ShiftAmt);
+      Op = TLI.LowerOperation(Op, DAG);
+      if (Op.Val) {
+        // Now that the custom expander is done, expand the result, which is
+        // still VT.
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+    
+    // If we can emit an efficient shift operation, do so now.
+    if (ExpandShift(ISD::SRA, Node->getOperand(0), ShiftAmt, Lo, Hi))
+      break;
+
+    // If this target supports SRA_PARTS, use it.
+    TargetLowering::LegalizeAction Action =
+      TLI.getOperationAction(ISD::SRA_PARTS, NVT);
+    if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+        Action == TargetLowering::Custom) {
+      ExpandShiftParts(ISD::SRA_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+      break;
+    }
+
+    // Otherwise, emit a libcall.
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SRA_I64), Node,
+                       true/*ashr is signed*/, Hi);
+    break;
+  }
+
+  case ISD::SRL: {
+    // If the target wants custom lowering, do so.
+    SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+    if (TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Custom) {
+      SDOperand Op = DAG.getNode(ISD::SRL, VT, Node->getOperand(0), ShiftAmt);
+      Op = TLI.LowerOperation(Op, DAG);
+      if (Op.Val) {
+        // Now that the custom expander is done, expand the result, which is
+        // still VT.
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+
+    // If we can emit an efficient shift operation, do so now.
+    if (ExpandShift(ISD::SRL, Node->getOperand(0), ShiftAmt, Lo, Hi))
+      break;
+
+    // If this target supports SRL_PARTS, use it.
+    TargetLowering::LegalizeAction Action =
+      TLI.getOperationAction(ISD::SRL_PARTS, NVT);
+    if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+        Action == TargetLowering::Custom) {
+      ExpandShiftParts(ISD::SRL_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+      break;
+    }
+
+    // Otherwise, emit a libcall.
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SRL_I64), Node,
+                       false/*lshr is unsigned*/, Hi);
+    break;
+  }
+
+  case ISD::ADD:
+  case ISD::SUB: {
+    // If the target wants to custom expand this, let them.
+    if (TLI.getOperationAction(Node->getOpcode(), VT) ==
+            TargetLowering::Custom) {
+      Op = TLI.LowerOperation(Op, DAG);
+      if (Op.Val) {
+        ExpandOp(Op, Lo, Hi);
+        break;
+      }
+    }
+    
+    // Expand the subcomponents.
+    SDOperand LHSL, LHSH, RHSL, RHSH;
+    ExpandOp(Node->getOperand(0), LHSL, LHSH);
+    ExpandOp(Node->getOperand(1), RHSL, RHSH);
+    SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+    SDOperand LoOps[2], HiOps[3];
+    LoOps[0] = LHSL;
+    LoOps[1] = RHSL;
+    HiOps[0] = LHSH;
+    HiOps[1] = RHSH;
+    if (Node->getOpcode() == ISD::ADD) {
+      Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SUBC, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::SUBE, VTList, HiOps, 3);
+    }
+    break;
+  }
+    
+  case ISD::ADDC:
+  case ISD::SUBC: {
+    // Expand the subcomponents.
+    SDOperand LHSL, LHSH, RHSL, RHSH;
+    ExpandOp(Node->getOperand(0), LHSL, LHSH);
+    ExpandOp(Node->getOperand(1), RHSL, RHSH);
+    SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+    SDOperand LoOps[2] = { LHSL, RHSL };
+    SDOperand HiOps[3] = { LHSH, RHSH };
+    
+    if (Node->getOpcode() == ISD::ADDC) {
+      Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SUBC, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::SUBE, VTList, HiOps, 3);
+    }
+    // Remember that we legalized the flag.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Hi.getValue(1)));
+    break;
+  }
+  case ISD::ADDE:
+  case ISD::SUBE: {
+    // Expand the subcomponents.
+    SDOperand LHSL, LHSH, RHSL, RHSH;
+    ExpandOp(Node->getOperand(0), LHSL, LHSH);
+    ExpandOp(Node->getOperand(1), RHSL, RHSH);
+    SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+    SDOperand LoOps[3] = { LHSL, RHSL, Node->getOperand(2) };
+    SDOperand HiOps[3] = { LHSH, RHSH };
+    
+    Lo = DAG.getNode(Node->getOpcode(), VTList, LoOps, 3);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(Node->getOpcode(), VTList, HiOps, 3);
+    
+    // Remember that we legalized the flag.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Hi.getValue(1)));
+    break;
+  }
+  case ISD::MUL: {
+    // If the target wants to custom expand this, let them.
+    if (TLI.getOperationAction(ISD::MUL, VT) == TargetLowering::Custom) {
+      SDOperand New = TLI.LowerOperation(Op, DAG);
+      if (New.Val) {
+        ExpandOp(New, Lo, Hi);
+        break;
+      }
+    }
+    
+    bool HasMULHS = TLI.isOperationLegal(ISD::MULHS, NVT);
+    bool HasMULHU = TLI.isOperationLegal(ISD::MULHU, NVT);
+    if (HasMULHS || HasMULHU) {
+      SDOperand LL, LH, RL, RH;
+      ExpandOp(Node->getOperand(0), LL, LH);
+      ExpandOp(Node->getOperand(1), RL, RH);
+      unsigned SH = MVT::getSizeInBits(RH.getValueType())-1;
+      // FIXME: Move this to the dag combiner.
+      // MULHS implicitly sign extends its inputs.  Check to see if ExpandOp
+      // extended the sign bit of the low half through the upper half, and if so
+      // emit a MULHS instead of the alternate sequence that is valid for any
+      // i64 x i64 multiply.
+      if (HasMULHS &&
+          // is RH an extension of the sign bit of RL?
+          RH.getOpcode() == ISD::SRA && RH.getOperand(0) == RL &&
+          RH.getOperand(1).getOpcode() == ISD::Constant &&
+          cast<ConstantSDNode>(RH.getOperand(1))->getValue() == SH &&
+          // is LH an extension of the sign bit of LL?
+          LH.getOpcode() == ISD::SRA && LH.getOperand(0) == LL &&
+          LH.getOperand(1).getOpcode() == ISD::Constant &&
+          cast<ConstantSDNode>(LH.getOperand(1))->getValue() == SH) {
+        // Low part:
+        Lo = DAG.getNode(ISD::MUL, NVT, LL, RL);
+        // High part:
+        Hi = DAG.getNode(ISD::MULHS, NVT, LL, RL);
+        break;
+      } else if (HasMULHU) {
+        // Low part:
+        Lo = DAG.getNode(ISD::MUL, NVT, LL, RL);
+        
+        // High part:
+        Hi = DAG.getNode(ISD::MULHU, NVT, LL, RL);
+        RH = DAG.getNode(ISD::MUL, NVT, LL, RH);
+        LH = DAG.getNode(ISD::MUL, NVT, LH, RL);
+        Hi = DAG.getNode(ISD::ADD, NVT, Hi, RH);
+        Hi = DAG.getNode(ISD::ADD, NVT, Hi, LH);
+        break;
+      }
+    }
+
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::MUL_I64), Node,
+                       false/*sign irrelevant*/, Hi);
+    break;
+  }
+  case ISD::SDIV:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SDIV_I64), Node, true, Hi);
+    break;
+  case ISD::UDIV:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::UDIV_I64), Node, true, Hi);
+    break;
+  case ISD::SREM:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SREM_I64), Node, true, Hi);
+    break;
+  case ISD::UREM:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::UREM_I64), Node, true, Hi);
+    break;
+
+  case ISD::FADD:
+    Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+                                          ? RTLIB::ADD_F32 : RTLIB::ADD_F64),
+                       Node, false, Hi);
+    break;
+  case ISD::FSUB:
+    Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+                                          ? RTLIB::SUB_F32 : RTLIB::SUB_F64),
+                       Node, false, Hi);
+    break;
+  case ISD::FMUL:
+    Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+                                          ? RTLIB::MUL_F32 : RTLIB::MUL_F64),
+                       Node, false, Hi);
+    break;
+  case ISD::FDIV:
+    Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+                                          ? RTLIB::DIV_F32 : RTLIB::DIV_F64),
+                       Node, false, Hi);
+    break;
+  case ISD::FP_EXTEND:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::FPEXT_F32_F64), Node, true,Hi);
+    break;
+  case ISD::FP_ROUND:
+    Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::FPROUND_F64_F32),Node,true,Hi);
+    break;
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS: {
+    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+    switch(Node->getOpcode()) {
+    case ISD::FSQRT:
+      LC = (VT == MVT::f32) ? RTLIB::SQRT_F32 : RTLIB::SQRT_F64;
+      break;
+    case ISD::FSIN:
+      LC = (VT == MVT::f32) ? RTLIB::SIN_F32 : RTLIB::SIN_F64;
+      break;
+    case ISD::FCOS:
+      LC = (VT == MVT::f32) ? RTLIB::COS_F32 : RTLIB::COS_F64;
+      break;
+    default: assert(0 && "Unreachable!");
+    }
+    Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, false, Hi);
+    break;
+  }
+  case ISD::FABS: {
+    SDOperand Mask = (VT == MVT::f64)
+      ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), VT)
+      : DAG.getConstantFP(BitsToFloat(~(1U << 31)), VT);
+    Mask = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask);
+    Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+    Lo = DAG.getNode(ISD::AND, NVT, Lo, Mask);
+    if (getTypeAction(NVT) == Expand)
+      ExpandOp(Lo, Lo, Hi);
+    break;
+  }
+  case ISD::FNEG: {
+    SDOperand Mask = (VT == MVT::f64)
+      ? DAG.getConstantFP(BitsToDouble(1ULL << 63), VT)
+      : DAG.getConstantFP(BitsToFloat(1U << 31), VT);
+    Mask = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask);
+    Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+    Lo = DAG.getNode(ISD::XOR, NVT, Lo, Mask);
+    if (getTypeAction(NVT) == Expand)
+      ExpandOp(Lo, Lo, Hi);
+    break;
+  }
+  case ISD::FCOPYSIGN: {
+    Lo = ExpandFCOPYSIGNToBitwiseOps(Node, NVT, DAG, TLI);
+    if (getTypeAction(NVT) == Expand)
+      ExpandOp(Lo, Lo, Hi);
+    break;
+  }
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP: {
+    bool isSigned = Node->getOpcode() == ISD::SINT_TO_FP;
+    MVT::ValueType SrcVT = Node->getOperand(0).getValueType();
+    RTLIB::Libcall LC;
+    if (Node->getOperand(0).getValueType() == MVT::i64) {
+      if (VT == MVT::f32)
+        LC = isSigned ? RTLIB::SINTTOFP_I64_F32 : RTLIB::UINTTOFP_I64_F32;
+      else
+        LC = isSigned ? RTLIB::SINTTOFP_I64_F64 : RTLIB::UINTTOFP_I64_F64;
+    } else {
+      if (VT == MVT::f32)
+        LC = isSigned ? RTLIB::SINTTOFP_I32_F32 : RTLIB::UINTTOFP_I32_F32;
+      else
+        LC = isSigned ? RTLIB::SINTTOFP_I32_F64 : RTLIB::UINTTOFP_I32_F64;
+    }
+
+    // Promote the operand if needed.
+    if (getTypeAction(SrcVT) == Promote) {
+      SDOperand Tmp = PromoteOp(Node->getOperand(0));
+      Tmp = isSigned
+        ? DAG.getNode(ISD::SIGN_EXTEND_INREG, Tmp.getValueType(), Tmp,
+                      DAG.getValueType(SrcVT))
+        : DAG.getZeroExtendInReg(Tmp, SrcVT);
+      Node = DAG.UpdateNodeOperands(Op, Tmp).Val;
+    }
+
+    const char *LibCall = TLI.getLibcallName(LC);
+    if (LibCall)
+      Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Hi);
+    else  {
+      Lo = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, VT,
+                         Node->getOperand(0));
+      if (getTypeAction(Lo.getValueType()) == Expand)
+        ExpandOp(Lo, Lo, Hi);
+    }
+    break;
+  }
+  }
+
+  // Make sure the resultant values have been legalized themselves, unless this
+  // is a type that requires multi-step expansion.
+  if (getTypeAction(NVT) != Expand && NVT != MVT::isVoid) {
+    Lo = LegalizeOp(Lo);
+    if (Hi.Val)
+      // Don't legalize the high part if it is expanded to a single node.
+      Hi = LegalizeOp(Hi);
+  }
+
+  // Remember in a map if the values will be reused later.
+  bool isNew = ExpandedNodes.insert(std::make_pair(Op, std::make_pair(Lo, Hi)));
+  assert(isNew && "Value already expanded?!?");
+}
+
+/// SplitVectorOp - Given an operand of vector type, break it down into
+/// two smaller values, still of vector type.
+void SelectionDAGLegalize::SplitVectorOp(SDOperand Op, SDOperand &Lo,
+                                         SDOperand &Hi) {
+  assert(MVT::isVector(Op.getValueType()) && "Cannot split non-vector type!");
+  SDNode *Node = Op.Val;
+  unsigned NumElements = MVT::getVectorNumElements(Node->getValueType(0));
+  assert(NumElements > 1 && "Cannot split a single element vector!");
+  unsigned NewNumElts = NumElements/2;
+  MVT::ValueType NewEltVT = MVT::getVectorElementType(Node->getValueType(0));
+  MVT::ValueType NewVT = MVT::getVectorType(NewEltVT, NewNumElts);
+  
+  // See if we already split it.
+  std::map<SDOperand, std::pair<SDOperand, SDOperand> >::iterator I
+    = SplitNodes.find(Op);
+  if (I != SplitNodes.end()) {
+    Lo = I->second.first;
+    Hi = I->second.second;
+    return;
+  }
+  
+  switch (Node->getOpcode()) {
+  default: 
+#ifndef NDEBUG
+    Node->dump(&DAG);
+#endif
+    assert(0 && "Unhandled operation in SplitVectorOp!");
+  case ISD::BUILD_PAIR:
+    Lo = Node->getOperand(0);
+    Hi = Node->getOperand(1);
+    break;
+  case ISD::BUILD_VECTOR: {
+    SmallVector<SDOperand, 8> LoOps(Node->op_begin(), 
+                                    Node->op_begin()+NewNumElts);
+    Lo = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &LoOps[0], LoOps.size());
+
+    SmallVector<SDOperand, 8> HiOps(Node->op_begin()+NewNumElts, 
+                                    Node->op_end());
+    Hi = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &HiOps[0], HiOps.size());
+    break;
+  }
+  case ISD::CONCAT_VECTORS: {
+    unsigned NewNumSubvectors = Node->getNumOperands() / 2;
+    if (NewNumSubvectors == 1) {
+      Lo = Node->getOperand(0);
+      Hi = Node->getOperand(1);
+    } else {
+      SmallVector<SDOperand, 8> LoOps(Node->op_begin(), 
+                                      Node->op_begin()+NewNumSubvectors);
+      Lo = DAG.getNode(ISD::CONCAT_VECTORS, NewVT, &LoOps[0], LoOps.size());
+
+      SmallVector<SDOperand, 8> HiOps(Node->op_begin()+NewNumSubvectors, 
+                                      Node->op_end());
+      Hi = DAG.getNode(ISD::CONCAT_VECTORS, NewVT, &HiOps[0], HiOps.size());
+    }
+    break;
+  }
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::FDIV:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {
+    SDOperand LL, LH, RL, RH;
+    SplitVectorOp(Node->getOperand(0), LL, LH);
+    SplitVectorOp(Node->getOperand(1), RL, RH);
+    
+    Lo = DAG.getNode(Node->getOpcode(), NewVT, LL, RL);
+    Hi = DAG.getNode(Node->getOpcode(), NewVT, LH, RH);
+    break;
+  }
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    SDOperand Ch = LD->getChain();
+    SDOperand Ptr = LD->getBasePtr();
+    const Value *SV = LD->getSrcValue();
+    int SVOffset = LD->getSrcValueOffset();
+    unsigned Alignment = LD->getAlignment();
+    bool isVolatile = LD->isVolatile();
+
+    Lo = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, isVolatile, Alignment);
+    unsigned IncrementSize = NewNumElts * MVT::getSizeInBits(NewEltVT)/8;
+    Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+                      getIntPtrConstant(IncrementSize));
+    SVOffset += IncrementSize;
+    if (Alignment > IncrementSize)
+      Alignment = IncrementSize;
+    Hi = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, isVolatile, Alignment);
+    
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    SDOperand TF = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo.getValue(1),
+                               Hi.getValue(1));
+    
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(TF));
+    break;
+  }
+  case ISD::BIT_CONVERT: {
+    // We know the result is a vector.  The input may be either a vector or a
+    // scalar value.
+    SDOperand InOp = Node->getOperand(0);
+    if (!MVT::isVector(InOp.getValueType()) ||
+        MVT::getVectorNumElements(InOp.getValueType()) == 1) {
+      // The input is a scalar or single-element vector.
+      // Lower to a store/load so that it can be split.
+      // FIXME: this could be improved probably.
+      SDOperand Ptr = CreateStackTemporary(InOp.getValueType());
+
+      SDOperand St = DAG.getStore(DAG.getEntryNode(),
+                                  InOp, Ptr, NULL, 0);
+      InOp = DAG.getLoad(Op.getValueType(), St, Ptr, NULL, 0);
+    }
+    // Split the vector and convert each of the pieces now.
+    SplitVectorOp(InOp, Lo, Hi);
+    Lo = DAG.getNode(ISD::BIT_CONVERT, NewVT, Lo);
+    Hi = DAG.getNode(ISD::BIT_CONVERT, NewVT, Hi);
+    break;
+  }
+  }
+      
+  // Remember in a map if the values will be reused later.
+  bool isNew = 
+    SplitNodes.insert(std::make_pair(Op, std::make_pair(Lo, Hi))).second;
+  assert(isNew && "Value already split?!?");
+}
+
+
+/// ScalarizeVectorOp - Given an operand of single-element vector type
+/// (e.g. v1f32), convert it into the equivalent operation that returns a
+/// scalar (e.g. f32) value.
+SDOperand SelectionDAGLegalize::ScalarizeVectorOp(SDOperand Op) {
+  assert(MVT::isVector(Op.getValueType()) &&
+         "Bad ScalarizeVectorOp invocation!");
+  SDNode *Node = Op.Val;
+  MVT::ValueType NewVT = MVT::getVectorElementType(Op.getValueType());
+  assert(MVT::getVectorNumElements(Op.getValueType()) == 1);
+  
+  // See if we already scalarized it.
+  std::map<SDOperand, SDOperand>::iterator I = ScalarizedNodes.find(Op);
+  if (I != ScalarizedNodes.end()) return I->second;
+  
+  SDOperand Result;
+  switch (Node->getOpcode()) {
+  default: 
+#ifndef NDEBUG
+    Node->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Unknown vector operation in ScalarizeVectorOp!");
+  case ISD::ADD:
+  case ISD::FADD:
+  case ISD::SUB:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::FMUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::FDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+  case ISD::FREM:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    Result = DAG.getNode(Node->getOpcode(),
+                         NewVT, 
+                         ScalarizeVectorOp(Node->getOperand(0)),
+                         ScalarizeVectorOp(Node->getOperand(1)));
+    break;
+  case ISD::FNEG:
+  case ISD::FABS:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+    Result = DAG.getNode(Node->getOpcode(),
+                         NewVT, 
+                         ScalarizeVectorOp(Node->getOperand(0)));
+    break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    SDOperand Ch = LegalizeOp(LD->getChain());     // Legalize the chain.
+    SDOperand Ptr = LegalizeOp(LD->getBasePtr());  // Legalize the pointer.
+    
+    const Value *SV = LD->getSrcValue();
+    int SVOffset = LD->getSrcValueOffset();
+    Result = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset,
+                         LD->isVolatile(), LD->getAlignment());
+
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+    break;
+  }
+  case ISD::BUILD_VECTOR:
+    Result = Node->getOperand(0);
+    break;
+  case ISD::INSERT_VECTOR_ELT:
+    // Returning the inserted scalar element.
+    Result = Node->getOperand(1);
+    break;
+  case ISD::CONCAT_VECTORS:
+    assert(Node->getOperand(0).getValueType() == NewVT &&
+           "Concat of non-legal vectors not yet supported!");
+    Result = Node->getOperand(0);
+    break;
+  case ISD::VECTOR_SHUFFLE: {
+    // Figure out if the scalar is the LHS or RHS and return it.
+    SDOperand EltNum = Node->getOperand(2).getOperand(0);
+    if (cast<ConstantSDNode>(EltNum)->getValue())
+      Result = ScalarizeVectorOp(Node->getOperand(1));
+    else
+      Result = ScalarizeVectorOp(Node->getOperand(0));
+    break;
+  }
+  case ISD::EXTRACT_SUBVECTOR:
+    Result = Node->getOperand(0);
+    assert(Result.getValueType() == NewVT);
+    break;
+  case ISD::BIT_CONVERT:
+    Result = DAG.getNode(ISD::BIT_CONVERT, NewVT, Op.getOperand(0));
+    break;
+  case ISD::SELECT:
+    Result = DAG.getNode(ISD::SELECT, NewVT, Op.getOperand(0),
+                         ScalarizeVectorOp(Op.getOperand(1)),
+                         ScalarizeVectorOp(Op.getOperand(2)));
+    break;
+  }
+
+  if (TLI.isTypeLegal(NewVT))
+    Result = LegalizeOp(Result);
+  bool isNew = ScalarizedNodes.insert(std::make_pair(Op, Result)).second;
+  assert(isNew && "Value already scalarized?");
+  return Result;
+}
+
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize() {
+  if (ViewLegalizeDAGs) viewGraph();
+
+  /// run - This is the main entry point to this class.
+  ///
+  SelectionDAGLegalize(*this).LegalizeDAG();
+}
+

diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 0000000..6c50288
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Makefile

@@ -0,0 +1,15 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSelectionDAG
+PARALLEL_DIRS =
+BUILD_ARCHIVE = 1
+DONT_BUILD_RELINKED = 1
+
+include $(LEVEL)/Makefile.common

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
new file mode 100644
index 0000000..06b2329
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp

@@ -0,0 +1,725 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple two pass scheduler.  The first pass attempts to push
+// backward any lengthy instructions and critical paths.  The second pass packs
+// instructions into semi-optimal time slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// BuildSchedUnits - Build SUnits from the selection dag that we are input.
+/// This SUnit graph is similar to the SelectionDAG, but represents flagged
+/// together nodes with a single SUnit.
+void ScheduleDAG::BuildSchedUnits() {
+  // Reserve entries in the vector for each of the SUnits we are creating.  This
+  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+  // invalidated.
+  SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end()));
+  
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  
+  for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); NI != E; ++NI) {
+    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
+      continue;
+    
+    // If this node has already been processed, stop now.
+    if (SUnitMap[NI]) continue;
+    
+    SUnit *NodeSUnit = NewSUnit(NI);
+    
+    // See if anything is flagged to this node, if so, add them to flagged
+    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
+    // are required the be the last operand and result of a node.
+    
+    // Scan up, adding flagged preds to FlaggedNodes.
+    SDNode *N = NI;
+    if (N->getNumOperands() &&
+        N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+      do {
+        N = N->getOperand(N->getNumOperands()-1).Val;
+        NodeSUnit->FlaggedNodes.push_back(N);
+        SUnitMap[N] = NodeSUnit;
+      } while (N->getNumOperands() &&
+               N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag);
+      std::reverse(NodeSUnit->FlaggedNodes.begin(),
+                   NodeSUnit->FlaggedNodes.end());
+    }
+    
+    // Scan down, adding this node and any flagged succs to FlaggedNodes if they
+    // have a user of the flag operand.
+    N = NI;
+    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
+      SDOperand FlagVal(N, N->getNumValues()-1);
+      
+      // There are either zero or one users of the Flag result.
+      bool HasFlagUse = false;
+      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 
+           UI != E; ++UI)
+        if (FlagVal.isOperand(*UI)) {
+          HasFlagUse = true;
+          NodeSUnit->FlaggedNodes.push_back(N);
+          SUnitMap[N] = NodeSUnit;
+          N = *UI;
+          break;
+        }
+      if (!HasFlagUse) break;
+    }
+    
+    // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node.
+    // Update the SUnit
+    NodeSUnit->Node = N;
+    SUnitMap[N] = NodeSUnit;
+    
+    // Compute the latency for the node.  We use the sum of the latencies for
+    // all nodes flagged together into this SUnit.
+    if (InstrItins.isEmpty()) {
+      // No latency information.
+      NodeSUnit->Latency = 1;
+    } else {
+      NodeSUnit->Latency = 0;
+      if (N->isTargetOpcode()) {
+        unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode());
+        InstrStage *S = InstrItins.begin(SchedClass);
+        InstrStage *E = InstrItins.end(SchedClass);
+        for (; S != E; ++S)
+          NodeSUnit->Latency += S->Cycles;
+      }
+      for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) {
+        SDNode *FNode = NodeSUnit->FlaggedNodes[i];
+        if (FNode->isTargetOpcode()) {
+          unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode());
+          InstrStage *S = InstrItins.begin(SchedClass);
+          InstrStage *E = InstrItins.end(SchedClass);
+          for (; S != E; ++S)
+            NodeSUnit->Latency += S->Cycles;
+        }
+      }
+    }
+  }
+  
+  // Pass 2: add the preds, succs, etc.
+  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+    SUnit *SU = &SUnits[su];
+    SDNode *MainNode = SU->Node;
+    
+    if (MainNode->isTargetOpcode()) {
+      unsigned Opc = MainNode->getTargetOpcode();
+      for (unsigned i = 0, ee = TII->getNumOperands(Opc); i != ee; ++i) {
+        if (TII->getOperandConstraint(Opc, i, TOI::TIED_TO) != -1) {
+          SU->isTwoAddress = true;
+          break;
+        }
+      }
+      if (TII->isCommutableInstr(Opc))
+        SU->isCommutable = true;
+    }
+    
+    // Find all predecessors and successors of the group.
+    // Temporarily add N to make code simpler.
+    SU->FlaggedNodes.push_back(MainNode);
+    
+    for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) {
+      SDNode *N = SU->FlaggedNodes[n];
+      
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+        SDNode *OpN = N->getOperand(i).Val;
+        if (isPassiveNode(OpN)) continue;   // Not scheduled.
+        SUnit *OpSU = SUnitMap[OpN];
+        assert(OpSU && "Node has no SUnit!");
+        if (OpSU == SU) continue;           // In the same group.
+
+        MVT::ValueType OpVT = N->getOperand(i).getValueType();
+        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+        bool isChain = OpVT == MVT::Other;
+        
+        if (SU->addPred(OpSU, isChain)) {
+          if (!isChain) {
+            SU->NumPreds++;
+            SU->NumPredsLeft++;
+          } else {
+            SU->NumChainPredsLeft++;
+          }
+        }
+        if (OpSU->addSucc(SU, isChain)) {
+          if (!isChain) {
+            OpSU->NumSuccs++;
+            OpSU->NumSuccsLeft++;
+          } else {
+            OpSU->NumChainSuccsLeft++;
+          }
+        }
+      }
+    }
+    
+    // Remove MainNode from FlaggedNodes again.
+    SU->FlaggedNodes.pop_back();
+  }
+  
+  return;
+}
+
+void ScheduleDAG::CalculateDepths() {
+  std::vector<std::pair<SUnit*, unsigned> > WorkList;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i)
+    if (SUnits[i].Preds.size() == 0/* && &SUnits[i] != Entry*/)
+      WorkList.push_back(std::make_pair(&SUnits[i], 0U));
+
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back().first;
+    unsigned Depth = WorkList.back().second;
+    WorkList.pop_back();
+    if (SU->Depth == 0 || Depth > SU->Depth) {
+      SU->Depth = Depth;
+      for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+           I != E; ++I)
+        WorkList.push_back(std::make_pair(I->first, Depth+1));
+    }
+  }
+}
+
+void ScheduleDAG::CalculateHeights() {
+  std::vector<std::pair<SUnit*, unsigned> > WorkList;
+  SUnit *Root = SUnitMap[DAG.getRoot().Val];
+  WorkList.push_back(std::make_pair(Root, 0U));
+
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back().first;
+    unsigned Height = WorkList.back().second;
+    WorkList.pop_back();
+    if (SU->Height == 0 || Height > SU->Height) {
+      SU->Height = Height;
+      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+           I != E; ++I)
+        WorkList.push_back(std::make_pair(I->first, Height+1));
+    }
+  }
+}
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the machine instrs.)
+unsigned ScheduleDAG::CountResults(SDNode *Node) {
+  unsigned N = Node->getNumValues();
+  while (N && Node->getValueType(N - 1) == MVT::Flag)
+    --N;
+  if (N && Node->getValueType(N - 1) == MVT::Other)
+    --N;    // Skip over chain result.
+  return N;
+}
+
+/// CountOperands  The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then flag operands.  Compute the
+/// number of actual operands that  will go into the machine instr.
+unsigned ScheduleDAG::CountOperands(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+    --N;
+  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+    --N; // Ignore chain if it exists.
+  return N;
+}
+
+static const TargetRegisterClass *getInstrOperandRegClass(
+        const MRegisterInfo *MRI, 
+        const TargetInstrInfo *TII,
+        const TargetInstrDescriptor *II,
+        unsigned Op) {
+  if (Op >= II->numOperands) {
+    assert((II->Flags & M_VARIABLE_OPS)&& "Invalid operand # of instruction");
+    return NULL;
+  }
+  const TargetOperandInfo &toi = II->OpInfo[Op];
+  return (toi.Flags & M_LOOK_UP_PTR_REG_CLASS)
+         ? TII->getPointerRegClass() : MRI->getRegClass(toi.RegClass);
+}
+
+static void CreateVirtualRegisters(SDNode *Node,
+                                   unsigned NumResults, 
+                                   const MRegisterInfo *MRI,
+                                   MachineInstr *MI,
+                                   SSARegMap *RegMap,
+                                   const TargetInstrInfo *TII,
+                                   const TargetInstrDescriptor &II,
+                                   DenseMap<SDOperand, unsigned> &VRBaseMap) {
+  for (unsigned i = 0; i < NumResults; ++i) {
+    // If the specific node value is only used by a CopyToReg and the dest reg
+    // is a vreg, use the CopyToReg'd destination register instead of creating
+    // a new vreg.
+    unsigned VRBase = 0;
+    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+         UI != E; ++UI) {
+      SDNode *Use = *UI;
+      if (Use->getOpcode() == ISD::CopyToReg && 
+          Use->getOperand(2).Val == Node &&
+          Use->getOperand(2).ResNo == i) {
+        unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+        if (MRegisterInfo::isVirtualRegister(Reg)) {
+          VRBase = Reg;
+          MI->addRegOperand(Reg, true);
+          break;
+        }
+      }
+    }
+
+    if (VRBase == 0) {
+      // Create the result registers for this node and add the result regs to
+      // the machine instruction.
+      const TargetRegisterClass *RC = getInstrOperandRegClass(MRI, TII, &II, i);
+      assert(RC && "Isn't a register operand!");
+      VRBase = RegMap->createVirtualRegister(RC);
+      MI->addRegOperand(VRBase, true);
+    }
+
+    bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,i), VRBase));
+    assert(isNew && "Node emitted out of order - early");
+  }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+static unsigned getVR(SDOperand Op, DenseMap<SDOperand, unsigned> &VRBaseMap) {
+  DenseMap<SDOperand, unsigned>::iterator I = VRBaseMap.find(Op);
+  assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+  return I->second;
+}
+
+
+/// AddOperand - Add the specified operand to the specified machine instr.  II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for 
+/// assertions only.
+void ScheduleDAG::AddOperand(MachineInstr *MI, SDOperand Op,
+                             unsigned IIOpNum,
+                             const TargetInstrDescriptor *II,
+                             DenseMap<SDOperand, unsigned> &VRBaseMap) {
+  if (Op.isTargetOpcode()) {
+    // Note that this case is redundant with the final else block, but we
+    // include it because it is the most common and it makes the logic
+    // simpler here.
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Flag &&
+           "Chain and flag operands should occur at end of operand list!");
+    
+    // Get/emit the operand.
+    unsigned VReg = getVR(Op, VRBaseMap);
+    const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+    bool isOptDef = (IIOpNum < TID->numOperands)
+      ? (TID->OpInfo[IIOpNum].Flags & M_OPTIONAL_DEF_OPERAND) : false;
+    MI->addRegOperand(VReg, isOptDef);
+    
+    // Verify that it is right.
+    assert(MRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+    if (II) {
+      const TargetRegisterClass *RC =
+                          getInstrOperandRegClass(MRI, TII, II, IIOpNum);
+      assert(RC && "Don't have operand info for this instruction!");
+      const TargetRegisterClass *VRC = RegMap->getRegClass(VReg);
+      if (VRC != RC) {
+        cerr << "Register class of operand and regclass of use don't agree!\n";
+#ifndef NDEBUG
+        cerr << "Operand = " << IIOpNum << "\n";
+        cerr << "Op->Val = "; Op.Val->dump(&DAG); cerr << "\n";
+        cerr << "MI = "; MI->print(cerr);
+        cerr << "VReg = " << VReg << "\n";
+        cerr << "VReg RegClass     size = " << VRC->getSize()
+             << ", align = " << VRC->getAlignment() << "\n";
+        cerr << "Expected RegClass size = " << RC->getSize()
+             << ", align = " << RC->getAlignment() << "\n";
+#endif
+        cerr << "Fatal error, aborting.\n";
+        abort();
+      }
+    }
+  } else if (ConstantSDNode *C =
+             dyn_cast<ConstantSDNode>(Op)) {
+    MI->addImmOperand(C->getValue());
+  } else if (RegisterSDNode *R =
+             dyn_cast<RegisterSDNode>(Op)) {
+    MI->addRegOperand(R->getReg(), false);
+  } else if (GlobalAddressSDNode *TGA =
+             dyn_cast<GlobalAddressSDNode>(Op)) {
+    MI->addGlobalAddressOperand(TGA->getGlobal(), TGA->getOffset());
+  } else if (BasicBlockSDNode *BB =
+             dyn_cast<BasicBlockSDNode>(Op)) {
+    MI->addMachineBasicBlockOperand(BB->getBasicBlock());
+  } else if (FrameIndexSDNode *FI =
+             dyn_cast<FrameIndexSDNode>(Op)) {
+    MI->addFrameIndexOperand(FI->getIndex());
+  } else if (JumpTableSDNode *JT =
+             dyn_cast<JumpTableSDNode>(Op)) {
+    MI->addJumpTableIndexOperand(JT->getIndex());
+  } else if (ConstantPoolSDNode *CP = 
+             dyn_cast<ConstantPoolSDNode>(Op)) {
+    int Offset = CP->getOffset();
+    unsigned Align = CP->getAlignment();
+    const Type *Type = CP->getType();
+    // MachineConstantPool wants an explicit alignment.
+    if (Align == 0) {
+      Align = TM.getTargetData()->getPreferredTypeAlignmentShift(Type);
+      if (Align == 0) {
+        // Alignment of vector types.  FIXME!
+        Align = TM.getTargetData()->getTypeSize(Type);
+        Align = Log2_64(Align);
+      }
+    }
+    
+    unsigned Idx;
+    if (CP->isMachineConstantPoolEntry())
+      Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+    else
+      Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align);
+    MI->addConstantPoolIndexOperand(Idx, Offset);
+  } else if (ExternalSymbolSDNode *ES = 
+             dyn_cast<ExternalSymbolSDNode>(Op)) {
+    MI->addExternalSymbolOperand(ES->getSymbol());
+  } else {
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Flag &&
+           "Chain and flag operands should occur at end of operand list!");
+    unsigned VReg = getVR(Op, VRBaseMap);
+    MI->addRegOperand(VReg, false);
+    
+    // Verify that it is right.
+    assert(MRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+    if (II) {
+      const TargetRegisterClass *RC =
+                            getInstrOperandRegClass(MRI, TII, II, IIOpNum);
+      assert(RC && "Don't have operand info for this instruction!");
+      assert(RegMap->getRegClass(VReg) == RC &&
+             "Register class of operand and regclass of use don't agree!");
+    }
+  }
+  
+}
+
+// Returns the Register Class of a physical register
+static const TargetRegisterClass *getPhysicalRegisterRegClass(
+        const MRegisterInfo *MRI,
+        MVT::ValueType VT,
+        unsigned reg) {
+  assert(MRegisterInfo::isPhysicalRegister(reg) &&
+         "reg must be a physical register");
+  // Pick the register class of the right type that contains this physreg.
+  for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(),
+         E = MRI->regclass_end(); I != E; ++I)
+    if ((*I)->hasType(VT) && (*I)->contains(reg))
+      return *I;
+  assert(false && "Couldn't find the register class");
+  return 0;
+}
+
+/// EmitNode - Generate machine code for an node and needed dependencies.
+///
+void ScheduleDAG::EmitNode(SDNode *Node, 
+                           DenseMap<SDOperand, unsigned> &VRBaseMap) {
+  // If machine instruction
+  if (Node->isTargetOpcode()) {
+    unsigned Opc = Node->getTargetOpcode();
+    const TargetInstrDescriptor &II = TII->get(Opc);
+
+    unsigned NumResults = CountResults(Node);
+    unsigned NodeOperands = CountOperands(Node);
+    unsigned NumMIOperands = NodeOperands + NumResults;
+#ifndef NDEBUG
+    assert((unsigned(II.numOperands) == NumMIOperands ||
+            (II.Flags & M_VARIABLE_OPS)) &&
+           "#operands for dag node doesn't match .td file!"); 
+#endif
+
+    // Create the new machine instruction.
+    MachineInstr *MI = new MachineInstr(II);
+    
+    // Add result register values for things that are defined by this
+    // instruction.
+    if (NumResults)
+      CreateVirtualRegisters(Node, NumResults, MRI, MI, RegMap,
+                             TII, II, VRBaseMap);
+    
+    // Emit all of the actual operands of this instruction, adding them to the
+    // instruction as appropriate.
+    for (unsigned i = 0; i != NodeOperands; ++i)
+      AddOperand(MI, Node->getOperand(i), i+NumResults, &II, VRBaseMap);
+
+    // Commute node if it has been determined to be profitable.
+    if (CommuteSet.count(Node)) {
+      MachineInstr *NewMI = TII->commuteInstruction(MI);
+      if (NewMI == 0)
+        DOUT << "Sched: COMMUTING FAILED!\n";
+      else {
+        DOUT << "Sched: COMMUTED TO: " << *NewMI;
+        if (MI != NewMI) {
+          delete MI;
+          MI = NewMI;
+        }
+      }
+    }
+
+    // Now that we have emitted all operands, emit this instruction itself.
+    if ((II.Flags & M_USES_CUSTOM_DAG_SCHED_INSERTION) == 0) {
+      BB->insert(BB->end(), MI);
+    } else {
+      // Insert this instruction into the end of the basic block, potentially
+      // taking some custom action.
+      BB = DAG.getTargetLoweringInfo().InsertAtEndOfBasicBlock(MI, BB);
+    }
+  } else {
+    switch (Node->getOpcode()) {
+    default:
+#ifndef NDEBUG
+      Node->dump(&DAG);
+#endif
+      assert(0 && "This target-independent node should have been selected!");
+    case ISD::EntryToken: // fall thru
+    case ISD::TokenFactor:
+    case ISD::LABEL:
+      break;
+    case ISD::CopyToReg: {
+      unsigned InReg;
+      if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(2)))
+        InReg = R->getReg();
+      else
+        InReg = getVR(Node->getOperand(2), VRBaseMap);
+      unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      if (InReg != DestReg)  {// Coalesced away the copy?
+        const TargetRegisterClass *TRC = 0;
+        // Get the target register class
+        if (MRegisterInfo::isVirtualRegister(InReg))
+          TRC = RegMap->getRegClass(InReg);
+        else
+          TRC = getPhysicalRegisterRegClass(MRI,
+                                            Node->getOperand(2).getValueType(),
+                                            InReg);
+        MRI->copyRegToReg(*BB, BB->end(), DestReg, InReg, TRC);
+      }
+      break;
+    }
+    case ISD::CopyFromReg: {
+      unsigned VRBase = 0;
+      unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      if (MRegisterInfo::isVirtualRegister(SrcReg)) {
+        // Just use the input register directly!
+        bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,0),SrcReg));
+        assert(isNew && "Node emitted out of order - early");
+        break;
+      }
+
+      // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+      // the CopyToReg'd destination register instead of creating a new vreg.
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *Use = *UI;
+        if (Use->getOpcode() == ISD::CopyToReg && 
+            Use->getOperand(2).Val == Node) {
+          unsigned DestReg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+          if (MRegisterInfo::isVirtualRegister(DestReg)) {
+            VRBase = DestReg;
+            break;
+          }
+        }
+      }
+
+      // Figure out the register class to create for the destreg.
+      const TargetRegisterClass *TRC = 0;
+      if (VRBase) {
+        TRC = RegMap->getRegClass(VRBase);
+      } else {
+        TRC = getPhysicalRegisterRegClass(MRI, Node->getValueType(0), SrcReg);
+
+        // Create the reg, emit the copy.
+        VRBase = RegMap->createVirtualRegister(TRC);
+      }
+      MRI->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC);
+
+      bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,0), VRBase));
+      assert(isNew && "Node emitted out of order - early");
+      break;
+    }
+    case ISD::INLINEASM: {
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+        --NumOps;  // Ignore the flag operand.
+      
+      // Create the inline asm machine instruction.
+      MachineInstr *MI =
+        new MachineInstr(BB, TII->get(TargetInstrInfo::INLINEASM));
+
+      // Add the asm string as an external symbol operand.
+      const char *AsmStr =
+        cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+      MI->addExternalSymbolOperand(AsmStr);
+      
+      // Add all of the operand registers to the instruction.
+      for (unsigned i = 2; i != NumOps;) {
+        unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getValue();
+        unsigned NumVals = Flags >> 3;
+        
+        MI->addImmOperand(Flags);
+        ++i;  // Skip the ID value.
+        
+        switch (Flags & 7) {
+        default: assert(0 && "Bad flags!");
+        case 1:  // Use of register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            MI->addRegOperand(Reg, false);
+          }
+          break;
+        case 2:   // Def of register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            MI->addRegOperand(Reg, true);
+          }
+          break;
+        case 3: { // Immediate.
+          assert(NumVals == 1 && "Unknown immediate value!");
+          if (ConstantSDNode *CS=dyn_cast<ConstantSDNode>(Node->getOperand(i))){
+            MI->addImmOperand(CS->getValue());
+          } else {
+            GlobalAddressSDNode *GA = 
+              cast<GlobalAddressSDNode>(Node->getOperand(i));
+            MI->addGlobalAddressOperand(GA->getGlobal(), GA->getOffset());
+          }
+          ++i;
+          break;
+        }
+        case 4:  // Addressing mode.
+          // The addressing mode has been selected, just add all of the
+          // operands to the machine instruction.
+          for (; NumVals; --NumVals, ++i)
+            AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+          break;
+        }
+      }
+      break;
+    }
+    }
+  }
+}
+
+void ScheduleDAG::EmitNoop() {
+  TII->insertNoop(*BB, BB->end());
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+void ScheduleDAG::EmitSchedule() {
+  // If this is the first basic block in the function, and if it has live ins
+  // that need to be copied into vregs, emit the copies into the top of the
+  // block before emitting the code for the block.
+  MachineFunction &MF = DAG.getMachineFunction();
+  if (&MF.front() == BB && MF.livein_begin() != MF.livein_end()) {
+    for (MachineFunction::livein_iterator LI = MF.livein_begin(),
+         E = MF.livein_end(); LI != E; ++LI)
+      if (LI->second)
+        MRI->copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
+                          LI->first, RegMap->getRegClass(LI->second));
+  }
+  
+  
+  // Finally, emit the code for all of the scheduled instructions.
+  DenseMap<SDOperand, unsigned> VRBaseMap;
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i]) {
+      for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++)
+        EmitNode(SU->FlaggedNodes[j], VRBaseMap);
+      EmitNode(SU->Node, VRBaseMap);
+    } else {
+      // Null SUnit* is a noop.
+      EmitNoop();
+    }
+  }
+}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(&DAG);
+    else
+      cerr << "**** NOOP ****\n";
+  }
+}
+
+
+/// Run - perform scheduling.
+///
+MachineBasicBlock *ScheduleDAG::Run() {
+  TII = TM.getInstrInfo();
+  MRI = TM.getRegisterInfo();
+  RegMap = BB->getParent()->getSSARegMap();
+  ConstPool = BB->getParent()->getConstantPool();
+
+  Schedule();
+  return BB;
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const SelectionDAG *G) const {
+  cerr << "SU(" << NodeNum << "): ";
+  Node->dump(G);
+  cerr << "\n";
+  if (FlaggedNodes.size() != 0) {
+    for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) {
+      cerr << "    ";
+      FlaggedNodes[i]->dump(G);
+      cerr << "\n";
+    }
+  }
+}
+
+void SUnit::dumpAll(const SelectionDAG *G) const {
+  dump(G);
+
+  cerr << "  # preds left       : " << NumPredsLeft << "\n";
+  cerr << "  # succs left       : " << NumSuccsLeft << "\n";
+  cerr << "  # chain preds left : " << NumChainPredsLeft << "\n";
+  cerr << "  # chain succs left : " << NumChainSuccsLeft << "\n";
+  cerr << "  Latency            : " << Latency << "\n";
+  cerr << "  Depth              : " << Depth << "\n";
+  cerr << "  Height             : " << Height << "\n";
+
+  if (Preds.size() != 0) {
+    cerr << "  Predecessors:\n";
+    for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+         I != E; ++I) {
+      if (I->second)
+        cerr << "   ch  #";
+      else
+        cerr << "   val #";
+      cerr << I->first << " - SU(" << I->first->NodeNum << ")\n";
+    }
+  }
+  if (Succs.size() != 0) {
+    cerr << "  Successors:\n";
+    for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+         I != E; ++I) {
+      if (I->second)
+        cerr << "   ch  #";
+      else
+        cerr << "   val #";
+      cerr << I->first << " - SU(" << I->first->NodeNum << ")\n";
+    }
+  }
+  cerr << "\n";
+}

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 0000000..9e4e46f
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp

@@ -0,0 +1,531 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+  tdListDAGScheduler("list-td", "  Top-down list scheduler",
+                     createTDListDAGScheduler);
+   
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation.  This supports
+/// top-down scheduling.
+///
+class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAG {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+  
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands becomes available, the instruction is
+  /// added to the AvailableQueue.  This keeps track of each SUnit and the
+  /// number of cycles left to execute before the operation is available.
+  std::vector<std::pair<unsigned, SUnit*> > PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  HazardRecognizer *HazardRec;
+
+public:
+  ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb,
+                  const TargetMachine &tm,
+                  SchedulingPriorityQueue *availqueue,
+                  HazardRecognizer *HR)
+    : ScheduleDAG(dag, bb, tm),
+      AvailableQueue(availqueue), HazardRec(HR) {
+    }
+
+  ~ScheduleDAGList() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void ReleaseSucc(SUnit *SuccSU, bool isChain);
+  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void ListScheduleTopDown();
+};
+}  // end anonymous namespace
+
+HazardRecognizer::~HazardRecognizer() {}
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+  DOUT << "********** List Scheduling **********\n";
+  
+  // Build scheduling units.
+  BuildSchedUnits();
+
+  AvailableQueue->initNodes(SUnitMap, SUnits);
+  
+  ListScheduleTopDown();
+  
+  AvailableQueue->releaseState();
+  
+  DOUT << "*** Final schedule ***\n";
+  DEBUG(dumpSchedule());
+  DOUT << "\n";
+  
+  // Emit in scheduled order
+  EmitSchedule();
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) {
+  if (!isChain)
+    SuccSU->NumPredsLeft--;
+  else
+    SuccSU->NumChainPredsLeft--;
+  
+  assert(SuccSU->NumPredsLeft >= 0 && SuccSU->NumChainPredsLeft >= 0 &&
+         "List scheduling internal error");
+  
+  if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
+    // Compute how many cycles it will be before this actually becomes
+    // available.  This is the max of the start time of all predecessors plus
+    // their latencies.
+    unsigned AvailableCycle = 0;
+    for (SUnit::pred_iterator I = SuccSU->Preds.begin(),
+         E = SuccSU->Preds.end(); I != E; ++I) {
+      // If this is a token edge, we don't need to wait for the latency of the
+      // preceeding instruction (e.g. a long-latency load) unless there is also
+      // some other data dependence.
+      SUnit &Pred = *I->first;
+      unsigned PredDoneCycle = Pred.Cycle;
+      if (!I->second)
+        PredDoneCycle += Pred.Latency;
+      else if (Pred.Latency)
+        PredDoneCycle += 1;
+
+      AvailableCycle = std::max(AvailableCycle, PredDoneCycle);
+    }
+    
+    PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU));
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(SU->dump(&DAG));
+  
+  Sequence.push_back(SU);
+  SU->Cycle = CurCycle;
+  
+  // Bottom up: release successors.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    ReleaseSucc(I->first, I->second);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = SUnits[i].isPending = true;
+    }
+  }
+  
+  // Emit the entry node first.
+  ScheduleNodeTopDown(Entry, CurCycle);
+  HazardRec->EmitInstruction(Entry->Node);
+  
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i].first == CurCycle) {
+        AvailableQueue->push(PendingQueue[i].second);
+        PendingQueue[i].second->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      } else {
+        assert(PendingQueue[i].first > CurCycle && "Negative latency?");
+      }
+    }
+    
+    // If there are no instructions available, don't try to issue anything, and
+    // don't advance the hazard recognizer.
+    if (AvailableQueue->empty()) {
+      ++CurCycle;
+      continue;
+    }
+
+    SUnit *FoundSUnit = 0;
+    SDNode *FoundNode = 0;
+    
+    bool HasNoopHazards = false;
+    while (!AvailableQueue->empty()) {
+      SUnit *CurSUnit = AvailableQueue->pop();
+      
+      // Get the node represented by this SUnit.
+      FoundNode = CurSUnit->Node;
+      
+      // If this is a pseudo op, like copyfromreg, look to see if there is a
+      // real target node flagged to it.  If so, use the target node.
+      for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size(); 
+           FoundNode->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i)
+        FoundNode = CurSUnit->FlaggedNodes[i];
+      
+      HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode);
+      if (HT == HazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+      
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == HazardRecognizer::NoopHazard;
+      
+      NotReady.push_back(CurSUnit);
+    }
+    
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue->push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule, do it now.
+    if (FoundSUnit) {
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundNode);
+      FoundSUnit->isScheduled = true;
+      AvailableQueue->ScheduledNode(FoundSUnit);
+
+      // If this is a pseudo-op node, we don't want to increment the current
+      // cycle.
+      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+        ++CurCycle;        
+    } else if (!HasNoopHazards) {
+      // Otherwise, we have a pipeline stall, but no other problem, just advance
+      // the current cycle and try again.
+      DOUT << "*** Advancing cycle, no work to do\n";
+      HazardRec->AdvanceCycle();
+      ++NumStalls;
+      ++CurCycle;
+    } else {
+      // Otherwise, we have no instructions to issue and we have instructions
+      // that will fault if we don't do this right.  This is the case for
+      // processors without pipeline interlocks and other cases.
+      DOUT << "*** Emitting noop\n";
+      HazardRec->EmitNoop();
+      Sequence.push_back(0);   // NULL SUnit* -> noop
+      ++NumNoops;
+      ++CurCycle;
+    }
+  }
+
+#ifndef NDEBUG
+  // Verify that all SUnits were scheduled.
+  bool AnyNotSched = false;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(&DAG);
+      cerr << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+  }
+  assert(!AnyNotSched);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                    LatencyPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+// 
+namespace {
+  class LatencyPriorityQueue;
+  
+  /// Sorting functions for the Available queue.
+  struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    LatencyPriorityQueue *PQ;
+    latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
+    latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+}  // end anonymous namespace
+
+namespace {
+  class LatencyPriorityQueue : public SchedulingPriorityQueue {
+    // SUnits - The SUnits for the current graph.
+    std::vector<SUnit> *SUnits;
+    
+    // Latencies - The latency (max of latency from this node to the bb exit)
+    // for each node.
+    std::vector<int> Latencies;
+
+    /// NumNodesSolelyBlocking - This vector contains, for every node in the
+    /// Queue, the number of nodes that the node is the sole unscheduled
+    /// predecessor for.  This is used as a tie-breaker heuristic for better
+    /// mobility.
+    std::vector<unsigned> NumNodesSolelyBlocking;
+
+    std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
+public:
+    LatencyPriorityQueue() : Queue(latency_sort(this)) {
+    }
+    
+    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+                   std::vector<SUnit> &sunits) {
+      SUnits = &sunits;
+      // Calculate node priorities.
+      CalculatePriorities();
+    }
+    void releaseState() {
+      SUnits = 0;
+      Latencies.clear();
+    }
+    
+    unsigned getLatency(unsigned NodeNum) const {
+      assert(NodeNum < Latencies.size());
+      return Latencies[NodeNum];
+    }
+    
+    unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
+      assert(NodeNum < NumNodesSolelyBlocking.size());
+      return NumNodesSolelyBlocking[NodeNum];
+    }
+    
+    bool empty() const { return Queue.empty(); }
+    
+    virtual void push(SUnit *U) {
+      push_impl(U);
+    }
+    void push_impl(SUnit *U);
+    
+    void push_all(const std::vector<SUnit *> &Nodes) {
+      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+        push_impl(Nodes[i]);
+    }
+    
+    SUnit *pop() {
+      if (empty()) return NULL;
+      SUnit *V = Queue.top();
+      Queue.pop();
+      return V;
+    }
+
+    // ScheduledNode - As nodes are scheduled, we look to see if there are any
+    // successor nodes that have a single unscheduled predecessor.  If so, that
+    // single predecessor has a higher priority, since scheduling it will make
+    // the node available.
+    void ScheduledNode(SUnit *Node);
+
+private:
+    void CalculatePriorities();
+    int CalcLatency(const SUnit &SU);
+    void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
+    SUnit *getSingleUnscheduledPred(SUnit *SU);
+
+    /// RemoveFromPriorityQueue - This is a really inefficient way to remove a
+    /// node from a priority queue.  We should roll our own heap to make this
+    /// better or something.
+    void RemoveFromPriorityQueue(SUnit *SU) {
+      std::vector<SUnit*> Temp;
+      
+      assert(!Queue.empty() && "Not in queue!");
+      while (Queue.top() != SU) {
+        Temp.push_back(Queue.top());
+        Queue.pop();
+        assert(!Queue.empty() && "Not in queue!");
+      }
+
+      // Remove the node from the PQ.
+      Queue.pop();
+      
+      // Add all the other nodes back.
+      for (unsigned i = 0, e = Temp.size(); i != e; ++i)
+        Queue.push(Temp[i]);
+    }
+  };
+}
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+  unsigned LHSNum = LHS->NodeNum;
+  unsigned RHSNum = RHS->NodeNum;
+
+  // The most important heuristic is scheduling the critical path.
+  unsigned LHSLatency = PQ->getLatency(LHSNum);
+  unsigned RHSLatency = PQ->getLatency(RHSNum);
+  if (LHSLatency < RHSLatency) return true;
+  if (LHSLatency > RHSLatency) return false;
+  
+  // After that, if two nodes have identical latencies, look to see if one will
+  // unblock more other nodes than the other.
+  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+  if (LHSBlocked < RHSBlocked) return true;
+  if (LHSBlocked > RHSBlocked) return false;
+  
+  // Finally, just to provide a stable ordering, use the node number as a
+  // deciding factor.
+  return LHSNum < RHSNum;
+}
+
+
+/// CalcNodePriority - Calculate the maximal path from the node to the exit.
+///
+int LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
+  int &Latency = Latencies[SU.NodeNum];
+  if (Latency != -1)
+    return Latency;
+  
+  int MaxSuccLatency = 0;
+  for (SUnit::const_succ_iterator I = SU.Succs.begin(), E = SU.Succs.end();
+       I != E; ++I)
+    MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first));
+
+  return Latency = MaxSuccLatency + SU.Latency;
+}
+
+/// CalculatePriorities - Calculate priorities of all scheduling units.
+void LatencyPriorityQueue::CalculatePriorities() {
+  Latencies.assign(SUnits->size(), -1);
+  NumNodesSolelyBlocking.assign(SUnits->size(), 0);
+  
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcLatency((*SUnits)[i]);
+}
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->first;
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+      
+  return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push_impl(SUnit *SU) {
+  // Look at all of the successors of this node.  Count the number of nodes that
+  // this node is the sole unscheduled node for.
+  unsigned NumNodesBlocking = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    if (getSingleUnscheduledPred(I->first) == SU)
+      ++NumNodesBlocking;
+  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+  
+  Queue.push(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor.  If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    AdjustPriorityOfUnscheduledPreds(I->first);
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled.  If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+  if (SU->isPending) return;  // All preds scheduled.
+  
+  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+  if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+  
+  // Okay, we found a single predecessor that is available, but not scheduled.
+  // Since it is available, it must be in the priority queue.  First remove it.
+  RemoveFromPriorityQueue(OnlyAvailablePred);
+
+  // Reinsert the node into the priority queue, which recomputes its
+  // NumNodesSolelyBlocking value.
+  push(OnlyAvailablePred);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler with a
+/// new hazard recognizer. This scheduler takes ownership of the hazard
+/// recognizer and deletes it when done.
+ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAGISel *IS,
+                                            SelectionDAG *DAG,
+                                            MachineBasicBlock *BB) {
+  return new ScheduleDAGList(*DAG, BB, DAG->getTarget(),
+                             new LatencyPriorityQueue(),
+                             IS->CreateTargetHazardRecognizer());
+}

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000..f95be7d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

@@ -0,0 +1,944 @@
+//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms.  The basic approach uses a priority
+// queue of available nodes to schedule.  One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+#include <queue>
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static RegisterScheduler
+  burrListDAGScheduler("list-burr",
+                       "  Bottom-up register reduction list scheduling",
+                       createBURRListDAGScheduler);
+static RegisterScheduler
+  tdrListrDAGScheduler("list-tdrr",
+                       "  Top-down register reduction list scheduling",
+                       createTDRRListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation.  This supports both top-down and bottom-up scheduling.
+///
+
+class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
+private:
+  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+  /// it is top-down.
+  bool isBottomUp;
+  
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+
+public:
+  ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb,
+                  const TargetMachine &tm, bool isbottomup,
+                  SchedulingPriorityQueue *availqueue)
+    : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
+      AvailableQueue(availqueue) {
+    }
+
+  ~ScheduleDAGRRList() {
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle);
+  void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle);
+  void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle);
+  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void ListScheduleTopDown();
+  void ListScheduleBottomUp();
+  void CommuteNodesToReducePressure();
+};
+}  // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+  DOUT << "********** List Scheduling **********\n";
+  
+  // Build scheduling units.
+  BuildSchedUnits();
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(&DAG));
+  CalculateDepths();
+  CalculateHeights();
+
+  AvailableQueue->initNodes(SUnitMap, SUnits);
+
+  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+  if (isBottomUp)
+    ListScheduleBottomUp();
+  else
+    ListScheduleTopDown();
+  
+  AvailableQueue->releaseState();
+
+  CommuteNodesToReducePressure();
+  
+  DOUT << "*** Final schedule ***\n";
+  DEBUG(dumpSchedule());
+  DOUT << "\n";
+  
+  // Emit in scheduled order
+  EmitSchedule();
+}
+
+/// CommuteNodesToReducePressure - If a node is two-address and commutable, and
+/// it is not the last use of its first operand, add it to the CommuteSet if
+/// possible. It will be commuted when it is translated to a MI.
+void ScheduleDAGRRList::CommuteNodesToReducePressure() {
+  SmallPtrSet<SUnit*, 4> OperandSeen;
+  for (unsigned i = Sequence.size()-1; i != 0; --i) {  // Ignore first node.
+    SUnit *SU = Sequence[i];
+    if (!SU) continue;
+    if (SU->isCommutable) {
+      unsigned Opc = SU->Node->getTargetOpcode();
+      unsigned NumRes = CountResults(SU->Node);
+      unsigned NumOps = CountOperands(SU->Node);
+      for (unsigned j = 0; j != NumOps; ++j) {
+        if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1)
+          continue;
+
+        SDNode *OpN = SU->Node->getOperand(j).Val;
+        SUnit *OpSU = SUnitMap[OpN];
+        if (OpSU && OperandSeen.count(OpSU) == 1) {
+          // Ok, so SU is not the last use of OpSU, but SU is two-address so
+          // it will clobber OpSU. Try to commute SU if no other source operands
+          // are live below.
+          bool DoCommute = true;
+          for (unsigned k = 0; k < NumOps; ++k) {
+            if (k != j) {
+              OpN = SU->Node->getOperand(k).Val;
+              OpSU = SUnitMap[OpN];
+              if (OpSU && OperandSeen.count(OpSU) == 1) {
+                DoCommute = false;
+                break;
+              }
+            }
+          }
+          if (DoCommute)
+            CommuteSet.insert(SU->Node);
+        }
+
+        // Only look at the first use&def node for now.
+        break;
+      }
+    }
+
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (!I->second)
+        OperandSeen.insert(I->first);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the Available queue is the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 
+                                    unsigned CurCycle) {
+  // FIXME: the distance between two nodes is not always == the predecessor's
+  // latency. For example, the reader can very well read the register written
+  // by the predecessor later than the issue cycle. It also depends on the
+  // interrupt model (drain vs. freeze).
+  PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
+
+  if (!isChain)
+    PredSU->NumSuccsLeft--;
+  else
+    PredSU->NumChainSuccsLeft--;
+  
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) {
+    cerr << "*** List scheduling failed! ***\n";
+    PredSU->dump(&DAG);
+    cerr << " has been released too many times!\n";
+    assert(0);
+  }
+#endif
+  
+  if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
+    // EntryToken has to go last!  Special case it here.
+    if (PredSU->Node->getOpcode() != ISD::EntryToken) {
+      PredSU->isAvailable = true;
+      AvailableQueue->push(PredSU);
+    }
+  }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(SU->dump(&DAG));
+  SU->Cycle = CurCycle;
+
+  AvailableQueue->ScheduledNode(SU);
+  Sequence.push_back(SU);
+
+  // Bottom up: release predecessors
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    ReleasePred(I->first, I->second, CurCycle);
+  SU->isScheduled = true;
+}
+
+/// isReady - True if node's lower cycle bound is less or equal to the current
+/// scheduling cycle. Always true if all nodes have uniform latency 1.
+static inline bool isReady(SUnit *SU, unsigned CurCycle) {
+  return SU->CycleBound <= CurCycle;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+  unsigned CurCycle = 0;
+  // Add root to Available queue.
+  AvailableQueue->push(SUnitMap[DAG.getRoot().Val]);
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back. Schedule the node.
+  std::vector<SUnit*> NotReady;
+  while (!AvailableQueue->empty()) {
+    SUnit *CurNode = AvailableQueue->pop();
+    while (CurNode && !isReady(CurNode, CurCycle)) {
+      NotReady.push_back(CurNode);
+      CurNode = AvailableQueue->pop();
+    }
+    
+    // Add the nodes that aren't ready back onto the available list.
+    AvailableQueue->push_all(NotReady);
+    NotReady.clear();
+
+    if (CurNode != NULL)
+      ScheduleNodeBottomUp(CurNode, CurCycle);
+    CurCycle++;
+  }
+
+  // Add entry node last
+  if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
+    SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+    Sequence.push_back(Entry);
+  }
+
+  // Reverse the order if it is bottom up.
+  std::reverse(Sequence.begin(), Sequence.end());
+  
+  
+#ifndef NDEBUG
+  // Verify that all SUnits were scheduled.
+  bool AnyNotSched = false;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(&DAG);
+      cerr << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+  }
+  assert(!AnyNotSched);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 
+                                    unsigned CurCycle) {
+  // FIXME: the distance between two nodes is not always == the predecessor's
+  // latency. For example, the reader can very well read the register written
+  // by the predecessor later than the issue cycle. It also depends on the
+  // interrupt model (drain vs. freeze).
+  SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency);
+
+  if (!isChain)
+    SuccSU->NumPredsLeft--;
+  else
+    SuccSU->NumChainPredsLeft--;
+  
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) {
+    cerr << "*** List scheduling failed! ***\n";
+    SuccSU->dump(&DAG);
+    cerr << " has been released too many times!\n";
+    assert(0);
+  }
+#endif
+  
+  if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
+    SuccSU->isAvailable = true;
+    AvailableQueue->push(SuccSU);
+  }
+}
+
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(SU->dump(&DAG));
+  SU->Cycle = CurCycle;
+
+  AvailableQueue->ScheduledNode(SU);
+  Sequence.push_back(SU);
+
+  // Top down: release successors
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    ReleaseSucc(I->first, I->second, CurCycle);
+  SU->isScheduled = true;
+}
+
+void ScheduleDAGRRList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+  
+  // Emit the entry node first.
+  ScheduleNodeTopDown(Entry, CurCycle);
+  CurCycle++;
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back. Schedule the node.
+  std::vector<SUnit*> NotReady;
+  while (!AvailableQueue->empty()) {
+    SUnit *CurNode = AvailableQueue->pop();
+    while (CurNode && !isReady(CurNode, CurCycle)) {
+      NotReady.push_back(CurNode);
+      CurNode = AvailableQueue->pop();
+    }
+    
+    // Add the nodes that aren't ready back onto the available list.
+    AvailableQueue->push_all(NotReady);
+    NotReady.clear();
+
+    if (CurNode != NULL)
+      ScheduleNodeTopDown(CurNode, CurCycle);
+    CurCycle++;
+  }
+  
+  
+#ifndef NDEBUG
+  // Verify that all SUnits were scheduled.
+  bool AnyNotSched = false;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (!SUnits[i].isScheduled) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(&DAG);
+      cerr << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+  }
+  assert(!AnyNotSched);
+#endif
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                RegReductionPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+// 
+namespace {
+  template<class SF>
+  class RegReductionPriorityQueue;
+  
+  /// Sorting functions for the Available queue.
+  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
+    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
+    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+
+  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
+    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
+    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+    
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+}  // end anonymous namespace
+
+static inline bool isCopyFromLiveIn(const SUnit *SU) {
+  SDNode *N = SU->Node;
+  return N->getOpcode() == ISD::CopyFromReg &&
+    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
+}
+
+namespace {
+  template<class SF>
+  class VISIBILITY_HIDDEN RegReductionPriorityQueue
+   : public SchedulingPriorityQueue {
+    std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
+
+  public:
+    RegReductionPriorityQueue() :
+    Queue(SF(this)) {}
+    
+    virtual void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+                           std::vector<SUnit> &sunits) {}
+    virtual void releaseState() {}
+    
+    virtual unsigned getNodePriority(const SUnit *SU) const {
+      return 0;
+    }
+    
+    bool empty() const { return Queue.empty(); }
+    
+    void push(SUnit *U) {
+      Queue.push(U);
+    }
+    void push_all(const std::vector<SUnit *> &Nodes) {
+      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+        Queue.push(Nodes[i]);
+    }
+    
+    SUnit *pop() {
+      if (empty()) return NULL;
+      SUnit *V = Queue.top();
+      Queue.pop();
+      return V;
+    }
+
+    virtual bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
+      return false;
+    }
+  };
+
+  template<class SF>
+  class VISIBILITY_HIDDEN BURegReductionPriorityQueue
+   : public RegReductionPriorityQueue<SF> {
+    // SUnitMap SDNode to SUnit mapping (n -> 1).
+    DenseMap<SDNode*, SUnit*> *SUnitMap;
+
+    // SUnits - The SUnits for the current graph.
+    const std::vector<SUnit> *SUnits;
+    
+    // SethiUllmanNumbers - The SethiUllman number for each node.
+    std::vector<unsigned> SethiUllmanNumbers;
+
+    const TargetInstrInfo *TII;
+  public:
+    BURegReductionPriorityQueue(const TargetInstrInfo *tii)
+      : TII(tii) {}
+
+    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+                   std::vector<SUnit> &sunits) {
+      SUnitMap = &sumap;
+      SUnits = &sunits;
+      // Add pseudo dependency edges for two-address nodes.
+      AddPseudoTwoAddrDeps();
+      // Calculate node priorities.
+      CalculateSethiUllmanNumbers();
+    }
+
+    void releaseState() {
+      SUnits = 0;
+      SethiUllmanNumbers.clear();
+    }
+
+    unsigned getNodePriority(const SUnit *SU) const {
+      assert(SU->NodeNum < SethiUllmanNumbers.size());
+      unsigned Opc = SU->Node->getOpcode();
+      if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
+        // CopyFromReg should be close to its def because it restricts
+        // allocation choices. But if it is a livein then perhaps we want it
+        // closer to its uses so it can be coalesced.
+        return 0xffff;
+      else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+        // CopyToReg should be close to its uses to facilitate coalescing and
+        // avoid spilling.
+        return 0;
+      else if (SU->NumSuccs == 0)
+        // If SU does not have a use, i.e. it doesn't produce a value that would
+        // be consumed (e.g. store), then it terminates a chain of computation.
+        // Give it a large SethiUllman number so it will be scheduled right
+        // before its predecessors that it doesn't lengthen their live ranges.
+        return 0xffff;
+      else if (SU->NumPreds == 0)
+        // If SU does not have a def, schedule it close to its uses because it
+        // does not lengthen any live ranges.
+        return 0;
+      else
+        return SethiUllmanNumbers[SU->NodeNum];
+    }
+
+    bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
+      unsigned Opc = SU1->Node->getTargetOpcode();
+      unsigned NumRes = ScheduleDAG::CountResults(SU1->Node);
+      unsigned NumOps = ScheduleDAG::CountOperands(SU1->Node);
+      for (unsigned i = 0; i != NumOps; ++i) {
+        if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) == -1)
+          continue;
+        if (SU1->Node->getOperand(i).isOperand(SU2->Node))
+          return true;
+      }
+      return false;
+    }
+  private:
+    bool canClobber(SUnit *SU, SUnit *Op);
+    void AddPseudoTwoAddrDeps();
+    void CalculateSethiUllmanNumbers();
+    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
+  };
+
+
+  template<class SF>
+  class TDRegReductionPriorityQueue : public RegReductionPriorityQueue<SF> {
+    // SUnitMap SDNode to SUnit mapping (n -> 1).
+    DenseMap<SDNode*, SUnit*> *SUnitMap;
+
+    // SUnits - The SUnits for the current graph.
+    const std::vector<SUnit> *SUnits;
+    
+    // SethiUllmanNumbers - The SethiUllman number for each node.
+    std::vector<unsigned> SethiUllmanNumbers;
+
+  public:
+    TDRegReductionPriorityQueue() {}
+
+    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+                   std::vector<SUnit> &sunits) {
+      SUnitMap = &sumap;
+      SUnits = &sunits;
+      // Calculate node priorities.
+      CalculateSethiUllmanNumbers();
+    }
+
+    void releaseState() {
+      SUnits = 0;
+      SethiUllmanNumbers.clear();
+    }
+
+    unsigned getNodePriority(const SUnit *SU) const {
+      assert(SU->NodeNum < SethiUllmanNumbers.size());
+      return SethiUllmanNumbers[SU->NodeNum];
+    }
+
+  private:
+    void CalculateSethiUllmanNumbers();
+    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
+  };
+}
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closet to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+  unsigned MaxCycle = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    unsigned Cycle = I->first->Cycle;
+    // If there are bunch of CopyToRegs stacked up, they should be considered
+    // to be at the same position.
+    if (I->first->Node->getOpcode() == ISD::CopyToReg)
+      Cycle = closestSucc(I->first)+1;
+    if (Cycle > MaxCycle)
+      MaxCycle = Cycle;
+  }
+  return MaxCycle;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers. Live-in operands and live-out results don't count
+/// since they are "fixed".
+static unsigned calcMaxScratches(const SUnit *SU) {
+  unsigned Scratches = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->second) continue;  // ignore chain preds
+    if (I->first->Node->getOpcode() != ISD::CopyFromReg)
+      Scratches++;
+  }
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->second) continue;  // ignore chain succs
+    if (I->first->Node->getOpcode() != ISD::CopyToReg)
+      Scratches += 10;
+  }
+  return Scratches;
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  // There used to be a special tie breaker here that looked for
+  // two-address instructions and preferred the instruction with a
+  // def&use operand.  The special case triggered diagnostics when
+  // _GLIBCXX_DEBUG was enabled because it broke the strict weak
+  // ordering that priority_queue requires. It didn't help much anyway
+  // because AddPseudoTwoAddrDeps already covers many of the cases
+  // where it would have applied.  In addition, it's counter-intuitive
+  // that a tie breaker would be the first thing attempted.  There's a
+  // "real" tie breaker below that is the operation of last resort.
+  // The fact that the "special tie breaker" would trigger when there
+  // wasn't otherwise a tie is what broke the strict weak ordering
+  // constraint.
+
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  if (LPriority > RPriority)
+    return true;
+  else if (LPriority == RPriority) {
+    // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+    // e.g.
+    // t1 = op t2, c1
+    // t3 = op t4, c2
+    //
+    // and the following instructions are both ready.
+    // t2 = op c3
+    // t4 = op c4
+    //
+    // Then schedule t2 = op first.
+    // i.e.
+    // t4 = op c4
+    // t2 = op c3
+    // t1 = op t2, c1
+    // t3 = op t4, c2
+    //
+    // This creates more short live intervals.
+    unsigned LDist = closestSucc(left);
+    unsigned RDist = closestSucc(right);
+    if (LDist < RDist)
+      return true;
+    else if (LDist == RDist) {
+      // Intuitively, it's good to push down instructions whose results are
+      // liveout so their long live ranges won't conflict with other values
+      // which are needed inside the BB. Further prioritize liveout instructions
+      // by the number of operands which are calculated within the BB.
+      unsigned LScratch = calcMaxScratches(left);
+      unsigned RScratch = calcMaxScratches(right);
+      if (LScratch > RScratch)
+        return true;
+      else if (LScratch == RScratch)
+        if (left->Height > right->Height)
+          return true;
+        else if (left->Height == right->Height)
+          if (left->Depth < right->Depth)
+            return true;
+          else if (left->Depth == right->Depth)
+            if (left->CycleBound > right->CycleBound) 
+              return true;
+    }
+  }
+  return false;
+}
+
+// FIXME: This is probably too slow!
+static void isReachable(SUnit *SU, SUnit *TargetSU,
+                        SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) {
+  if (Reached) return;
+  if (SU == TargetSU) {
+    Reached = true;
+    return;
+  }
+  if (!Visited.insert(SU)) return;
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E;
+       ++I)
+    isReachable(I->first, TargetSU, Visited, Reached);
+}
+
+static bool isReachable(SUnit *SU, SUnit *TargetSU) {
+  SmallPtrSet<SUnit*, 32> Visited;
+  bool Reached = false;
+  isReachable(SU, TargetSU, Visited, Reached);
+  return Reached;
+}
+
+template<class SF>
+bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) {
+  if (SU->isTwoAddress) {
+    unsigned Opc = SU->Node->getTargetOpcode();
+    unsigned NumRes = ScheduleDAG::CountResults(SU->Node);
+    unsigned NumOps = ScheduleDAG::CountOperands(SU->Node);
+    for (unsigned i = 0; i != NumOps; ++i) {
+      if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) {
+        SDNode *DU = SU->Node->getOperand(i).Val;
+        if (Op == (*SUnitMap)[DU])
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule).
+template<class SF>
+void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = (SUnit *)&((*SUnits)[i]);
+    if (!SU->isTwoAddress)
+      continue;
+
+    SDNode *Node = SU->Node;
+    if (!Node->isTargetOpcode())
+      continue;
+
+    unsigned Opc = Node->getTargetOpcode();
+    unsigned NumRes = ScheduleDAG::CountResults(Node);
+    unsigned NumOps = ScheduleDAG::CountOperands(Node);
+    for (unsigned j = 0; j != NumOps; ++j) {
+      if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) {
+        SDNode *DU = SU->Node->getOperand(j).Val;
+        SUnit *DUSU = (*SUnitMap)[DU];
+        if (!DUSU) continue;
+        for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end();
+             I != E; ++I) {
+          if (I->second) continue;
+          SUnit *SuccSU = I->first;
+          if (SuccSU != SU &&
+              (!canClobber(SuccSU, DUSU) ||
+               (!SU->isCommutable && SuccSU->isCommutable))){
+            if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) {
+              DOUT << "Adding an edge from SU # " << SU->NodeNum
+                   << " to SU #" << SuccSU->NodeNum << "\n";
+              if (SU->addPred(SuccSU, true))
+                SU->NumChainPredsLeft++;
+              if (SuccSU->addSucc(SU, true))
+                SuccSU->NumChainSuccsLeft++;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 
+/// Smaller number is the higher priority.
+template<class SF>
+unsigned BURegReductionPriorityQueue<SF>::
+CalcNodeSethiUllmanNumber(const SUnit *SU) {
+  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
+  if (SethiUllmanNumber != 0)
+    return SethiUllmanNumber;
+
+  unsigned Extra = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->second) continue;  // ignore chain preds
+    SUnit *PredSU = I->first;
+    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
+    if (PredSethiUllman > SethiUllmanNumber) {
+      SethiUllmanNumber = PredSethiUllman;
+      Extra = 0;
+    } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
+      Extra++;
+  }
+
+  SethiUllmanNumber += Extra;
+
+  if (SethiUllmanNumber == 0)
+    SethiUllmanNumber = 1;
+  
+  return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
+  
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
+}
+
+static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
+  unsigned Sum = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    SUnit *SuccSU = I->first;
+    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+         EE = SuccSU->Preds.end(); II != EE; ++II) {
+      SUnit *PredSU = II->first;
+      if (!PredSU->isScheduled)
+        Sum++;
+    }
+  }
+
+  return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  bool LIsTarget = left->Node->isTargetOpcode();
+  bool RIsTarget = right->Node->isTargetOpcode();
+  bool LIsFloater = LIsTarget && left->NumPreds == 0;
+  bool RIsFloater = RIsTarget && right->NumPreds == 0;
+  unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0;
+  unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0;
+
+  if (left->NumSuccs == 0 && right->NumSuccs != 0)
+    return false;
+  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+    return true;
+
+  // Special tie breaker: if two nodes share a operand, the one that use it
+  // as a def&use operand is preferred.
+  if (LIsTarget && RIsTarget) {
+    if (left->isTwoAddress && !right->isTwoAddress) {
+      SDNode *DUNode = left->Node->getOperand(0).Val;
+      if (DUNode->isOperand(right->Node))
+        RBonus += 2;
+    }
+    if (!left->isTwoAddress && right->isTwoAddress) {
+      SDNode *DUNode = right->Node->getOperand(0).Val;
+      if (DUNode->isOperand(left->Node))
+        LBonus += 2;
+    }
+  }
+  if (LIsFloater)
+    LBonus -= 2;
+  if (RIsFloater)
+    RBonus -= 2;
+  if (left->NumSuccs == 1)
+    LBonus += 2;
+  if (right->NumSuccs == 1)
+    RBonus += 2;
+
+  if (LPriority+LBonus < RPriority+RBonus)
+    return true;
+  else if (LPriority == RPriority)
+    if (left->Depth < right->Depth)
+      return true;
+    else if (left->Depth == right->Depth)
+      if (left->NumSuccsLeft > right->NumSuccsLeft)
+        return true;
+      else if (left->NumSuccsLeft == right->NumSuccsLeft)
+        if (left->CycleBound > right->CycleBound) 
+          return true;
+  return false;
+}
+
+/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 
+/// Smaller number is the higher priority.
+template<class SF>
+unsigned TDRegReductionPriorityQueue<SF>::
+CalcNodeSethiUllmanNumber(const SUnit *SU) {
+  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
+  if (SethiUllmanNumber != 0)
+    return SethiUllmanNumber;
+
+  unsigned Opc = SU->Node->getOpcode();
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    SethiUllmanNumber = 0xffff;
+  else if (SU->NumSuccsLeft == 0)
+    // If SU does not have a use, i.e. it doesn't produce a value that would
+    // be consumed (e.g. store), then it terminates a chain of computation.
+    // Give it a small SethiUllman number so it will be scheduled right before
+    // its predecessors that it doesn't lengthen their live ranges.
+    SethiUllmanNumber = 0;
+  else if (SU->NumPredsLeft == 0 &&
+           (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
+    SethiUllmanNumber = 0xffff;
+  else {
+    int Extra = 0;
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (I->second) continue;  // ignore chain preds
+      SUnit *PredSU = I->first;
+      unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
+      if (PredSethiUllman > SethiUllmanNumber) {
+        SethiUllmanNumber = PredSethiUllman;
+        Extra = 0;
+      } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
+        Extra++;
+    }
+
+    SethiUllmanNumber += Extra;
+  }
+  
+  return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
+  
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+                                                    SelectionDAG *DAG,
+                                                    MachineBasicBlock *BB) {
+  const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo();
+  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
+                           new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII));
+}
+
+llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+                                                    SelectionDAG *DAG,
+                                                    MachineBasicBlock *BB) {
+  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false,
+                              new TDRegReductionPriorityQueue<td_ls_rr_sort>());
+}
+

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp
new file mode 100644
index 0000000..62854f7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp

@@ -0,0 +1,1156 @@
+//===-- ScheduleDAGSimple.cpp - Implement a trivial DAG scheduler ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple two pass scheduler.  The first pass attempts to push
+// backward any lengthy instructions and critical paths.  The second pass packs
+// instructions into semi-optimal time slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+static RegisterScheduler
+  bfsDAGScheduler("none", "  No scheduling: breadth first sequencing",
+                  createBFS_DAGScheduler);
+static RegisterScheduler
+  simpleDAGScheduler("simple",
+                     "  Simple two pass scheduling: minimize critical path "
+                     "and maximize processor utilization",
+                      createSimpleDAGScheduler);
+static RegisterScheduler
+  noitinDAGScheduler("simple-noitin",
+                     "  Simple two pass scheduling: Same as simple "
+                     "except using generic latency",
+                     createNoItinsDAGScheduler);
+                     
+class NodeInfo;
+typedef NodeInfo *NodeInfoPtr;
+typedef std::vector<NodeInfoPtr>           NIVector;
+typedef std::vector<NodeInfoPtr>::iterator NIIterator;
+
+//===--------------------------------------------------------------------===//
+///
+/// Node group -  This struct is used to manage flagged node groups.
+///
+class NodeGroup {
+public:
+  NodeGroup     *Next;
+private:
+  NIVector      Members;                // Group member nodes
+  NodeInfo      *Dominator;             // Node with highest latency
+  unsigned      Latency;                // Total latency of the group
+  int           Pending;                // Number of visits pending before
+                                        // adding to order  
+
+public:
+  // Ctor.
+  NodeGroup() : Next(NULL), Dominator(NULL), Pending(0) {}
+
+  // Accessors
+  inline void setDominator(NodeInfo *D) { Dominator = D; }
+  inline NodeInfo *getTop() { return Members.front(); }
+  inline NodeInfo *getBottom() { return Members.back(); }
+  inline NodeInfo *getDominator() { return Dominator; }
+  inline void setLatency(unsigned L) { Latency = L; }
+  inline unsigned getLatency() { return Latency; }
+  inline int getPending() const { return Pending; }
+  inline void setPending(int P)  { Pending = P; }
+  inline int addPending(int I)  { return Pending += I; }
+
+  // Pass thru
+  inline bool group_empty() { return Members.empty(); }
+  inline NIIterator group_begin() { return Members.begin(); }
+  inline NIIterator group_end() { return Members.end(); }
+  inline void group_push_back(const NodeInfoPtr &NI) {
+    Members.push_back(NI);
+  }
+  inline NIIterator group_insert(NIIterator Pos, const NodeInfoPtr &NI) {
+    return Members.insert(Pos, NI);
+  }
+  inline void group_insert(NIIterator Pos, NIIterator First,
+                           NIIterator Last) {
+    Members.insert(Pos, First, Last);
+  }
+
+  static void Add(NodeInfo *D, NodeInfo *U);
+};
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeInfo - This struct tracks information used to schedule the a node.
+///
+class NodeInfo {
+private:
+  int           Pending;                // Number of visits pending before
+                                        // adding to order
+public:
+  SDNode        *Node;                  // DAG node
+  InstrStage    *StageBegin;            // First stage in itinerary
+  InstrStage    *StageEnd;              // Last+1 stage in itinerary
+  unsigned      Latency;                // Total cycles to complete instr
+  bool          IsCall : 1;             // Is function call
+  bool          IsLoad : 1;             // Is memory load
+  bool          IsStore : 1;            // Is memory store
+  unsigned      Slot;                   // Node's time slot
+  NodeGroup     *Group;                 // Grouping information
+#ifndef NDEBUG
+  unsigned      Preorder;               // Index before scheduling
+#endif
+
+  // Ctor.
+  NodeInfo(SDNode *N = NULL)
+    : Pending(0)
+    , Node(N)
+    , StageBegin(NULL)
+    , StageEnd(NULL)
+    , Latency(0)
+    , IsCall(false)
+    , Slot(0)
+    , Group(NULL)
+#ifndef NDEBUG
+    , Preorder(0)
+#endif
+  {}
+
+  // Accessors
+  inline bool isInGroup() const {
+    assert(!Group || !Group->group_empty() && "Group with no members");
+    return Group != NULL;
+  }
+  inline bool isGroupDominator() const {
+    return isInGroup() && Group->getDominator() == this;
+  }
+  inline int getPending() const {
+    return Group ? Group->getPending() : Pending;
+  }
+  inline void setPending(int P) {
+    if (Group) Group->setPending(P);
+    else       Pending = P;
+  }
+  inline int addPending(int I) {
+    if (Group) return Group->addPending(I);
+    else       return Pending += I;
+  }
+};
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeGroupIterator - Iterates over all the nodes indicated by the node
+/// info. If the node is in a group then iterate over the members of the
+/// group, otherwise just the node info.
+///
+class NodeGroupIterator {
+private:
+  NodeInfo   *NI;                       // Node info
+  NIIterator NGI;                       // Node group iterator
+  NIIterator NGE;                       // Node group iterator end
+
+public:
+  // Ctor.
+  NodeGroupIterator(NodeInfo *N) : NI(N) {
+    // If the node is in a group then set up the group iterator.  Otherwise
+    // the group iterators will trip first time out.
+    if (N->isInGroup()) {
+      // get Group
+      NodeGroup *Group = NI->Group;
+      NGI = Group->group_begin();
+      NGE = Group->group_end();
+      // Prevent this node from being used (will be in members list
+      NI = NULL;
+    }
+  }
+
+  /// next - Return the next node info, otherwise NULL.
+  ///
+  NodeInfo *next() {
+    // If members list
+    if (NGI != NGE) return *NGI++;
+    // Use node as the result (may be NULL)
+    NodeInfo *Result = NI;
+    // Only use once
+    NI = NULL;
+    // Return node or NULL
+    return Result;
+  }
+};
+//===--------------------------------------------------------------------===//
+
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeGroupOpIterator - Iterates over all the operands of a node.  If the
+/// node is a member of a group, this iterates over all the operands of all
+/// the members of the group.
+///
+class NodeGroupOpIterator {
+private:
+  NodeInfo            *NI;              // Node containing operands
+  NodeGroupIterator   GI;               // Node group iterator
+  SDNode::op_iterator OI;               // Operand iterator
+  SDNode::op_iterator OE;               // Operand iterator end
+
+  /// CheckNode - Test if node has more operands.  If not get the next node
+  /// skipping over nodes that have no operands.
+  void CheckNode() {
+    // Only if operands are exhausted first
+    while (OI == OE) {
+      // Get next node info
+      NodeInfo *NI = GI.next();
+      // Exit if nodes are exhausted
+      if (!NI) return;
+      // Get node itself
+      SDNode *Node = NI->Node;
+      // Set up the operand iterators
+      OI = Node->op_begin();
+      OE = Node->op_end();
+    }
+  }
+
+public:
+  // Ctor.
+  NodeGroupOpIterator(NodeInfo *N)
+    : NI(N), GI(N), OI(SDNode::op_iterator()), OE(SDNode::op_iterator()) {}
+
+  /// isEnd - Returns true when not more operands are available.
+  ///
+  inline bool isEnd() { CheckNode(); return OI == OE; }
+
+  /// next - Returns the next available operand.
+  ///
+  inline SDOperand next() {
+    assert(OI != OE &&
+           "Not checking for end of NodeGroupOpIterator correctly");
+    return *OI++;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+///
+/// BitsIterator - Provides iteration through individual bits in a bit vector.
+///
+template<class T>
+class BitsIterator {
+private:
+  T Bits;                               // Bits left to iterate through
+
+public:
+  /// Ctor.
+  BitsIterator(T Initial) : Bits(Initial) {}
+  
+  /// Next - Returns the next bit set or zero if exhausted.
+  inline T Next() {
+    // Get the rightmost bit set
+    T Result = Bits & -Bits;
+    // Remove from rest
+    Bits &= ~Result;
+    // Return single bit or zero
+    return Result;
+  }
+};
+  
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+///
+/// ResourceTally - Manages the use of resources over time intervals.  Each
+/// item (slot) in the tally vector represents the resources used at a given
+/// moment.  A bit set to 1 indicates that a resource is in use, otherwise
+/// available.  An assumption is made that the tally is large enough to schedule
+/// all current instructions (asserts otherwise.)
+///
+template<class T>
+class ResourceTally {
+private:
+  std::vector<T> Tally;                 // Resources used per slot
+  typedef typename std::vector<T>::iterator Iter;
+                                        // Tally iterator 
+  
+  /// SlotsAvailable - Returns true if all units are available.
+  ///
+  bool SlotsAvailable(Iter Begin, unsigned N, unsigned ResourceSet,
+                      unsigned &Resource) {
+    assert(N && "Must check availability with N != 0");
+    // Determine end of interval
+    Iter End = Begin + N;
+    assert(End <= Tally.end() && "Tally is not large enough for schedule");
+    
+    // Iterate thru each resource
+    BitsIterator<T> Resources(ResourceSet & ~*Begin);
+    while (unsigned Res = Resources.Next()) {
+      // Check if resource is available for next N slots
+      Iter Interval = End;
+      do {
+        Interval--;
+        if (*Interval & Res) break;
+      } while (Interval != Begin);
+      
+      // If available for N
+      if (Interval == Begin) {
+        // Success
+        Resource = Res;
+        return true;
+      }
+    }
+    
+    // No luck
+    Resource = 0;
+    return false;
+  }
+  
+  /// RetrySlot - Finds a good candidate slot to retry search.
+  Iter RetrySlot(Iter Begin, unsigned N, unsigned ResourceSet) {
+    assert(N && "Must check availability with N != 0");
+    // Determine end of interval
+    Iter End = Begin + N;
+    assert(End <= Tally.end() && "Tally is not large enough for schedule");
+    
+    while (Begin != End--) {
+      // Clear units in use
+      ResourceSet &= ~*End;
+      // If no units left then we should go no further 
+      if (!ResourceSet) return End + 1;
+    }
+    // Made it all the way through
+    return Begin;
+  }
+  
+  /// FindAndReserveStages - Return true if the stages can be completed. If
+  /// so mark as busy.
+  bool FindAndReserveStages(Iter Begin,
+                            InstrStage *Stage, InstrStage *StageEnd) {
+    // If at last stage then we're done
+    if (Stage == StageEnd) return true;
+    // Get number of cycles for current stage
+    unsigned N = Stage->Cycles;
+    // Check to see if N slots are available, if not fail
+    unsigned Resource;
+    if (!SlotsAvailable(Begin, N, Stage->Units, Resource)) return false;
+    // Check to see if remaining stages are available, if not fail
+    if (!FindAndReserveStages(Begin + N, Stage + 1, StageEnd)) return false;
+    // Reserve resource
+    Reserve(Begin, N, Resource);
+    // Success
+    return true;
+  }
+
+  /// Reserve - Mark busy (set) the specified N slots.
+  void Reserve(Iter Begin, unsigned N, unsigned Resource) {
+    // Determine end of interval
+    Iter End = Begin + N;
+    assert(End <= Tally.end() && "Tally is not large enough for schedule");
+ 
+    // Set resource bit in each slot
+    for (; Begin < End; Begin++)
+      *Begin |= Resource;
+  }
+
+  /// FindSlots - Starting from Begin, locate consecutive slots where all stages
+  /// can be completed.  Returns the address of first slot.
+  Iter FindSlots(Iter Begin, InstrStage *StageBegin, InstrStage *StageEnd) {
+    // Track position      
+    Iter Cursor = Begin;
+    
+    // Try all possible slots forward
+    while (true) {
+      // Try at cursor, if successful return position.
+      if (FindAndReserveStages(Cursor, StageBegin, StageEnd)) return Cursor;
+      // Locate a better position
+      Cursor = RetrySlot(Cursor + 1, StageBegin->Cycles, StageBegin->Units);
+    }
+  }
+  
+public:
+  /// Initialize - Resize and zero the tally to the specified number of time
+  /// slots.
+  inline void Initialize(unsigned N) {
+    Tally.assign(N, 0);   // Initialize tally to all zeros.
+  }
+
+  // FindAndReserve - Locate an ideal slot for the specified stages and mark
+  // as busy.
+  unsigned FindAndReserve(unsigned Slot, InstrStage *StageBegin,
+                          InstrStage *StageEnd) {
+    // Where to begin 
+    Iter Begin = Tally.begin() + Slot;
+    // Find a free slot
+    Iter Where = FindSlots(Begin, StageBegin, StageEnd);
+    // Distance is slot number
+    unsigned Final = Where - Tally.begin();
+    return Final;
+  }
+
+};
+
+//===----------------------------------------------------------------------===//
+///
+/// ScheduleDAGSimple - Simple two pass scheduler.
+///
+class VISIBILITY_HIDDEN ScheduleDAGSimple : public ScheduleDAG {
+private:
+  bool NoSched;                         // Just do a BFS schedule, nothing fancy
+  bool NoItins;                         // Don't use itineraries?
+  ResourceTally<unsigned> Tally;        // Resource usage tally
+  unsigned NSlots;                      // Total latency
+  static const unsigned NotFound = ~0U; // Search marker
+
+  unsigned NodeCount;                   // Number of nodes in DAG
+  std::map<SDNode *, NodeInfo *> Map;   // Map nodes to info
+  bool HasGroups;                       // True if there are any groups
+  NodeInfo *Info;                       // Info for nodes being scheduled
+  NIVector Ordering;                    // Emit ordering of nodes
+  NodeGroup *HeadNG, *TailNG;           // Keep track of allocated NodeGroups
+  
+public:
+
+  // Ctor.
+  ScheduleDAGSimple(bool noSched, bool noItins, SelectionDAG &dag,
+                    MachineBasicBlock *bb, const TargetMachine &tm)
+    : ScheduleDAG(dag, bb, tm), NoSched(noSched), NoItins(noItins), NSlots(0),
+    NodeCount(0), HasGroups(false), Info(NULL), HeadNG(NULL), TailNG(NULL) {
+    assert(&TII && "Target doesn't provide instr info?");
+    assert(&MRI && "Target doesn't provide register info?");
+  }
+
+  virtual ~ScheduleDAGSimple() {
+    if (Info)
+      delete[] Info;
+    
+    NodeGroup *NG = HeadNG;
+    while (NG) {
+      NodeGroup *NextSU = NG->Next;
+      delete NG;
+      NG = NextSU;
+    }
+  }
+
+  void Schedule();
+
+  /// getNI - Returns the node info for the specified node.
+  ///
+  NodeInfo *getNI(SDNode *Node) { return Map[Node]; }
+  
+private:
+  static bool isDefiner(NodeInfo *A, NodeInfo *B);
+  void IncludeNode(NodeInfo *NI);
+  void VisitAll();
+  void GatherSchedulingInfo();
+  void FakeGroupDominators(); 
+  bool isStrongDependency(NodeInfo *A, NodeInfo *B);
+  bool isWeakDependency(NodeInfo *A, NodeInfo *B);
+  void ScheduleBackward();
+  void ScheduleForward();
+  
+  void AddToGroup(NodeInfo *D, NodeInfo *U);
+  /// PrepareNodeInfo - Set up the basic minimum node info for scheduling.
+  /// 
+  void PrepareNodeInfo();
+  
+  /// IdentifyGroups - Put flagged nodes into groups.
+  ///
+  void IdentifyGroups();
+  
+  /// print - Print ordering to specified output stream.
+  ///
+  void print(std::ostream &O) const;
+  void print(std::ostream *O) const { if (O) print(*O); }
+  
+  void dump(const char *tag) const;
+  
+  virtual void dump() const;
+  
+  /// EmitAll - Emit all nodes in schedule sorted order.
+  ///
+  void EmitAll();
+
+  /// printNI - Print node info.
+  ///
+  void printNI(std::ostream &O, NodeInfo *NI) const;
+  void printNI(std::ostream *O, NodeInfo *NI) const { if (O) printNI(*O, NI); }
+  
+  /// printChanges - Hilight changes in order caused by scheduling.
+  ///
+  void printChanges(unsigned Index) const;
+};
+
+//===----------------------------------------------------------------------===//
+/// Special case itineraries.
+///
+enum {
+  CallLatency = 40,          // To push calls back in time
+
+  RSInteger   = 0xC0000000,  // Two integer units
+  RSFloat     = 0x30000000,  // Two float units
+  RSLoadStore = 0x0C000000,  // Two load store units
+  RSBranch    = 0x02000000   // One branch unit
+};
+static InstrStage LoadStage  = { 5, RSLoadStore };
+static InstrStage StoreStage = { 2, RSLoadStore };
+static InstrStage IntStage   = { 2, RSInteger };
+static InstrStage FloatStage = { 3, RSFloat };
+//===----------------------------------------------------------------------===//
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+
+/// PrepareNodeInfo - Set up the basic minimum node info for scheduling.
+/// 
+void ScheduleDAGSimple::PrepareNodeInfo() {
+  // Allocate node information
+  Info = new NodeInfo[NodeCount];
+  
+  unsigned i = 0;
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I, ++i) {
+    // Fast reference to node schedule info
+    NodeInfo* NI = &Info[i];
+    // Set up map
+    Map[I] = NI;
+    // Set node
+    NI->Node = I;
+    // Set pending visit count
+    NI->setPending(I->use_size());
+  }
+}
+
+/// IdentifyGroups - Put flagged nodes into groups.
+///
+void ScheduleDAGSimple::IdentifyGroups() {
+  for (unsigned i = 0, N = NodeCount; i < N; i++) {
+    NodeInfo* NI = &Info[i];
+    SDNode *Node = NI->Node;
+    
+    // For each operand (in reverse to only look at flags)
+    for (unsigned N = Node->getNumOperands(); 0 < N--;) {
+      // Get operand
+      SDOperand Op = Node->getOperand(N);
+      // No more flags to walk
+      if (Op.getValueType() != MVT::Flag) break;
+      // Add to node group
+      AddToGroup(getNI(Op.Val), NI);
+      // Let everyone else know
+      HasGroups = true;
+    }
+  }
+}
+
+/// CountInternalUses - Returns the number of edges between the two nodes.
+///
+static unsigned CountInternalUses(NodeInfo *D, NodeInfo *U) {
+  unsigned N = 0;
+  for (unsigned M = U->Node->getNumOperands(); 0 < M--;) {
+    SDOperand Op = U->Node->getOperand(M);
+    if (Op.Val == D->Node) N++;
+  }
+  
+  return N;
+}
+
+//===----------------------------------------------------------------------===//
+/// Add - Adds a definer and user pair to a node group.
+///
+void ScheduleDAGSimple::AddToGroup(NodeInfo *D, NodeInfo *U) {
+  // Get current groups
+  NodeGroup *DGroup = D->Group;
+  NodeGroup *UGroup = U->Group;
+  // If both are members of groups
+  if (DGroup && UGroup) {
+    // There may have been another edge connecting 
+    if (DGroup == UGroup) return;
+    // Add the pending users count
+    DGroup->addPending(UGroup->getPending());
+    // For each member of the users group
+    NodeGroupIterator UNGI(U);
+    while (NodeInfo *UNI = UNGI.next() ) {
+      // Change the group
+      UNI->Group = DGroup;
+      // For each member of the definers group
+      NodeGroupIterator DNGI(D);
+      while (NodeInfo *DNI = DNGI.next() ) {
+        // Remove internal edges
+        DGroup->addPending(-CountInternalUses(DNI, UNI));
+      }
+    }
+    // Merge the two lists
+    DGroup->group_insert(DGroup->group_end(),
+                         UGroup->group_begin(), UGroup->group_end());
+  } else if (DGroup) {
+    // Make user member of definers group
+    U->Group = DGroup;
+    // Add users uses to definers group pending
+    DGroup->addPending(U->Node->use_size());
+    // For each member of the definers group
+    NodeGroupIterator DNGI(D);
+    while (NodeInfo *DNI = DNGI.next() ) {
+      // Remove internal edges
+      DGroup->addPending(-CountInternalUses(DNI, U));
+    }
+    DGroup->group_push_back(U);
+  } else if (UGroup) {
+    // Make definer member of users group
+    D->Group = UGroup;
+    // Add definers uses to users group pending
+    UGroup->addPending(D->Node->use_size());
+    // For each member of the users group
+    NodeGroupIterator UNGI(U);
+    while (NodeInfo *UNI = UNGI.next() ) {
+      // Remove internal edges
+      UGroup->addPending(-CountInternalUses(D, UNI));
+    }
+    UGroup->group_insert(UGroup->group_begin(), D);
+  } else {
+    D->Group = U->Group = DGroup = new NodeGroup();
+    DGroup->addPending(D->Node->use_size() + U->Node->use_size() -
+                       CountInternalUses(D, U));
+    DGroup->group_push_back(D);
+    DGroup->group_push_back(U);
+    
+    if (HeadNG == NULL)
+      HeadNG = DGroup;
+    if (TailNG != NULL)
+      TailNG->Next = DGroup;
+    TailNG = DGroup;
+  }
+}
+
+
+/// print - Print ordering to specified output stream.
+///
+void ScheduleDAGSimple::print(std::ostream &O) const {
+#ifndef NDEBUG
+  O << "Ordering\n";
+  for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+    NodeInfo *NI = Ordering[i];
+    printNI(O, NI);
+    O << "\n";
+    if (NI->isGroupDominator()) {
+      NodeGroup *Group = NI->Group;
+      for (NIIterator NII = Group->group_begin(), E = Group->group_end();
+           NII != E; NII++) {
+        O << "    ";
+        printNI(O, *NII);
+        O << "\n";
+      }
+    }
+  }
+#endif
+}
+
+void ScheduleDAGSimple::dump(const char *tag) const {
+  cerr << tag; dump();
+}
+
+void ScheduleDAGSimple::dump() const {
+  print(cerr);
+}
+
+
+/// EmitAll - Emit all nodes in schedule sorted order.
+///
+void ScheduleDAGSimple::EmitAll() {
+  // If this is the first basic block in the function, and if it has live ins
+  // that need to be copied into vregs, emit the copies into the top of the
+  // block before emitting the code for the block.
+  MachineFunction &MF = DAG.getMachineFunction();
+  if (&MF.front() == BB && MF.livein_begin() != MF.livein_end()) {
+    for (MachineFunction::livein_iterator LI = MF.livein_begin(),
+         E = MF.livein_end(); LI != E; ++LI)
+      if (LI->second)
+        MRI->copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
+                          LI->first, RegMap->getRegClass(LI->second));
+  }
+  
+  DenseMap<SDOperand, unsigned> VRBaseMap;
+  
+  // For each node in the ordering
+  for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+    // Get the scheduling info
+    NodeInfo *NI = Ordering[i];
+    if (NI->isInGroup()) {
+      NodeGroupIterator NGI(Ordering[i]);
+      while (NodeInfo *NI = NGI.next()) EmitNode(NI->Node, VRBaseMap);
+    } else {
+      EmitNode(NI->Node, VRBaseMap);
+    }
+  }
+}
+
+/// isFlagDefiner - Returns true if the node defines a flag result.
+static bool isFlagDefiner(SDNode *A) {
+  unsigned N = A->getNumValues();
+  return N && A->getValueType(N - 1) == MVT::Flag;
+}
+
+/// isFlagUser - Returns true if the node uses a flag result.
+///
+static bool isFlagUser(SDNode *A) {
+  unsigned N = A->getNumOperands();
+  return N && A->getOperand(N - 1).getValueType() == MVT::Flag;
+}
+
+/// printNI - Print node info.
+///
+void ScheduleDAGSimple::printNI(std::ostream &O, NodeInfo *NI) const {
+#ifndef NDEBUG
+  SDNode *Node = NI->Node;
+  O << " "
+    << std::hex << Node << std::dec
+    << ", Lat=" << NI->Latency
+    << ", Slot=" << NI->Slot
+    << ", ARITY=(" << Node->getNumOperands() << ","
+    << Node->getNumValues() << ")"
+    << " " << Node->getOperationName(&DAG);
+  if (isFlagDefiner(Node)) O << "<#";
+  if (isFlagUser(Node)) O << ">#";
+#endif
+}
+
+/// printChanges - Hilight changes in order caused by scheduling.
+///
+void ScheduleDAGSimple::printChanges(unsigned Index) const {
+#ifndef NDEBUG
+  // Get the ordered node count
+  unsigned N = Ordering.size();
+  // Determine if any changes
+  unsigned i = 0;
+  for (; i < N; i++) {
+    NodeInfo *NI = Ordering[i];
+    if (NI->Preorder != i) break;
+  }
+  
+  if (i < N) {
+    cerr << Index << ". New Ordering\n";
+    
+    for (i = 0; i < N; i++) {
+      NodeInfo *NI = Ordering[i];
+      cerr << "  " << NI->Preorder << ". ";
+      printNI(cerr, NI);
+      cerr << "\n";
+      if (NI->isGroupDominator()) {
+        NodeGroup *Group = NI->Group;
+        for (NIIterator NII = Group->group_begin(), E = Group->group_end();
+             NII != E; NII++) {
+          cerr << "          ";
+          printNI(cerr, *NII);
+          cerr << "\n";
+        }
+      }
+    }
+  } else {
+    cerr << Index << ". No Changes\n";
+  }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+/// isDefiner - Return true if node A is a definer for B.
+///
+bool ScheduleDAGSimple::isDefiner(NodeInfo *A, NodeInfo *B) {
+  // While there are A nodes
+  NodeGroupIterator NII(A);
+  while (NodeInfo *NI = NII.next()) {
+    // Extract node
+    SDNode *Node = NI->Node;
+    // While there operands in nodes of B
+    NodeGroupOpIterator NGOI(B);
+    while (!NGOI.isEnd()) {
+      SDOperand Op = NGOI.next();
+      // If node from A defines a node in B
+      if (Node == Op.Val) return true;
+    }
+  }
+  return false;
+}
+
+/// IncludeNode - Add node to NodeInfo vector.
+///
+void ScheduleDAGSimple::IncludeNode(NodeInfo *NI) {
+  // Get node
+  SDNode *Node = NI->Node;
+  // Ignore entry node
+  if (Node->getOpcode() == ISD::EntryToken) return;
+  // Check current count for node
+  int Count = NI->getPending();
+  // If the node is already in list
+  if (Count < 0) return;
+  // Decrement count to indicate a visit
+  Count--;
+  // If count has gone to zero then add node to list
+  if (!Count) {
+    // Add node
+    if (NI->isInGroup()) {
+      Ordering.push_back(NI->Group->getDominator());
+    } else {
+      Ordering.push_back(NI);
+    }
+    // indicate node has been added
+    Count--;
+  }
+  // Mark as visited with new count 
+  NI->setPending(Count);
+}
+
+/// GatherSchedulingInfo - Get latency and resource information about each node.
+///
+void ScheduleDAGSimple::GatherSchedulingInfo() {
+  // Get instruction itineraries for the target
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  
+  // For each node
+  for (unsigned i = 0, N = NodeCount; i < N; i++) {
+    // Get node info
+    NodeInfo* NI = &Info[i];
+    SDNode *Node = NI->Node;
+    
+    // If there are itineraries and it is a machine instruction
+    if (InstrItins.isEmpty() || NoItins) {
+      // If machine opcode
+      if (Node->isTargetOpcode()) {
+        // Get return type to guess which processing unit 
+        MVT::ValueType VT = Node->getValueType(0);
+        // Get machine opcode
+        MachineOpCode TOpc = Node->getTargetOpcode();
+        NI->IsCall = TII->isCall(TOpc);
+        NI->IsLoad = TII->isLoad(TOpc);
+        NI->IsStore = TII->isStore(TOpc);
+
+        if (TII->isLoad(TOpc))             NI->StageBegin = &LoadStage;
+        else if (TII->isStore(TOpc))       NI->StageBegin = &StoreStage;
+        else if (MVT::isInteger(VT))       NI->StageBegin = &IntStage;
+        else if (MVT::isFloatingPoint(VT)) NI->StageBegin = &FloatStage;
+        if (NI->StageBegin) NI->StageEnd = NI->StageBegin + 1;
+      }
+    } else if (Node->isTargetOpcode()) {
+      // get machine opcode
+      MachineOpCode TOpc = Node->getTargetOpcode();
+      // Check to see if it is a call
+      NI->IsCall = TII->isCall(TOpc);
+      // Get itinerary stages for instruction
+      unsigned II = TII->getSchedClass(TOpc);
+      NI->StageBegin = InstrItins.begin(II);
+      NI->StageEnd = InstrItins.end(II);
+    }
+    
+    // One slot for the instruction itself
+    NI->Latency = 1;
+    
+    // Add long latency for a call to push it back in time
+    if (NI->IsCall) NI->Latency += CallLatency;
+    
+    // Sum up all the latencies
+    for (InstrStage *Stage = NI->StageBegin, *E = NI->StageEnd;
+        Stage != E; Stage++) {
+      NI->Latency += Stage->Cycles;
+    }
+    
+    // Sum up all the latencies for max tally size
+    NSlots += NI->Latency;
+  }
+  
+  // Unify metrics if in a group
+  if (HasGroups) {
+    for (unsigned i = 0, N = NodeCount; i < N; i++) {
+      NodeInfo* NI = &Info[i];
+      
+      if (NI->isInGroup()) {
+        NodeGroup *Group = NI->Group;
+        
+        if (!Group->getDominator()) {
+          NIIterator NGI = Group->group_begin(), NGE = Group->group_end();
+          NodeInfo *Dominator = *NGI;
+          unsigned Latency = 0;
+          
+          for (NGI++; NGI != NGE; NGI++) {
+            NodeInfo* NGNI = *NGI;
+            Latency += NGNI->Latency;
+            if (Dominator->Latency < NGNI->Latency) Dominator = NGNI;
+          }
+          
+          Dominator->Latency = Latency;
+          Group->setDominator(Dominator);
+        }
+      }
+    }
+  }
+}
+
+/// VisitAll - Visit each node breadth-wise to produce an initial ordering.
+/// Note that the ordering in the Nodes vector is reversed.
+void ScheduleDAGSimple::VisitAll() {
+  // Add first element to list
+  NodeInfo *NI = getNI(DAG.getRoot().Val);
+  if (NI->isInGroup()) {
+    Ordering.push_back(NI->Group->getDominator());
+  } else {
+    Ordering.push_back(NI);
+  }
+
+  // Iterate through all nodes that have been added
+  for (unsigned i = 0; i < Ordering.size(); i++) { // note: size() varies
+    // Visit all operands
+    NodeGroupOpIterator NGI(Ordering[i]);
+    while (!NGI.isEnd()) {
+      // Get next operand
+      SDOperand Op = NGI.next();
+      // Get node
+      SDNode *Node = Op.Val;
+      // Ignore passive nodes
+      if (isPassiveNode(Node)) continue;
+      // Check out node
+      IncludeNode(getNI(Node));
+    }
+  }
+
+  // Add entry node last (IncludeNode filters entry nodes)
+  if (DAG.getEntryNode().Val != DAG.getRoot().Val)
+    Ordering.push_back(getNI(DAG.getEntryNode().Val));
+    
+  // Reverse the order
+  std::reverse(Ordering.begin(), Ordering.end());
+}
+
+/// FakeGroupDominators - Set dominators for non-scheduling.
+/// 
+void ScheduleDAGSimple::FakeGroupDominators() {
+  for (unsigned i = 0, N = NodeCount; i < N; i++) {
+    NodeInfo* NI = &Info[i];
+    
+    if (NI->isInGroup()) {
+      NodeGroup *Group = NI->Group;
+      
+      if (!Group->getDominator()) {
+        Group->setDominator(NI);
+      }
+    }
+  }
+}
+
+/// isStrongDependency - Return true if node A has results used by node B. 
+/// I.E., B must wait for latency of A.
+bool ScheduleDAGSimple::isStrongDependency(NodeInfo *A, NodeInfo *B) {
+  // If A defines for B then it's a strong dependency or
+  // if a load follows a store (may be dependent but why take a chance.)
+  return isDefiner(A, B) || (A->IsStore && B->IsLoad);
+}
+
+/// isWeakDependency Return true if node A produces a result that will
+/// conflict with operands of B.  It is assumed that we have called
+/// isStrongDependency prior.
+bool ScheduleDAGSimple::isWeakDependency(NodeInfo *A, NodeInfo *B) {
+  // TODO check for conflicting real registers and aliases
+#if 0 // FIXME - Since we are in SSA form and not checking register aliasing
+  return A->Node->getOpcode() == ISD::EntryToken || isStrongDependency(B, A);
+#else
+  return A->Node->getOpcode() == ISD::EntryToken;
+#endif
+}
+
+/// ScheduleBackward - Schedule instructions so that any long latency
+/// instructions and the critical path get pushed back in time. Time is run in
+/// reverse to allow code reuse of the Tally and eliminate the overhead of
+/// biasing every slot indices against NSlots.
+void ScheduleDAGSimple::ScheduleBackward() {
+  // Size and clear the resource tally
+  Tally.Initialize(NSlots);
+  // Get number of nodes to schedule
+  unsigned N = Ordering.size();
+  
+  // For each node being scheduled
+  for (unsigned i = N; 0 < i--;) {
+    NodeInfo *NI = Ordering[i];
+    // Track insertion
+    unsigned Slot = NotFound;
+    
+    // Compare against those previously scheduled nodes
+    unsigned j = i + 1;
+    for (; j < N; j++) {
+      // Get following instruction
+      NodeInfo *Other = Ordering[j];
+      
+      // Check dependency against previously inserted nodes
+      if (isStrongDependency(NI, Other)) {
+        Slot = Other->Slot + Other->Latency;
+        break;
+      } else if (isWeakDependency(NI, Other)) {
+        Slot = Other->Slot;
+        break;
+      }
+    }
+    
+    // If independent of others (or first entry)
+    if (Slot == NotFound) Slot = 0;
+    
+#if 0 // FIXME - measure later
+    // Find a slot where the needed resources are available
+    if (NI->StageBegin != NI->StageEnd)
+      Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
+#endif
+      
+    // Set node slot
+    NI->Slot = Slot;
+    
+    // Insert sort based on slot
+    j = i + 1;
+    for (; j < N; j++) {
+      // Get following instruction
+      NodeInfo *Other = Ordering[j];
+      // Should we look further (remember slots are in reverse time)
+      if (Slot >= Other->Slot) break;
+      // Shuffle other into ordering
+      Ordering[j - 1] = Other;
+    }
+    // Insert node in proper slot
+    if (j != i + 1) Ordering[j - 1] = NI;
+  }
+}
+
+/// ScheduleForward - Schedule instructions to maximize packing.
+///
+void ScheduleDAGSimple::ScheduleForward() {
+  // Size and clear the resource tally
+  Tally.Initialize(NSlots);
+  // Get number of nodes to schedule
+  unsigned N = Ordering.size();
+  
+  // For each node being scheduled
+  for (unsigned i = 0; i < N; i++) {
+    NodeInfo *NI = Ordering[i];
+    // Track insertion
+    unsigned Slot = NotFound;
+    
+    // Compare against those previously scheduled nodes
+    unsigned j = i;
+    for (; 0 < j--;) {
+      // Get following instruction
+      NodeInfo *Other = Ordering[j];
+      
+      // Check dependency against previously inserted nodes
+      if (isStrongDependency(Other, NI)) {
+        Slot = Other->Slot + Other->Latency;
+        break;
+      } else if (Other->IsCall || isWeakDependency(Other, NI)) {
+        Slot = Other->Slot;
+        break;
+      }
+    }
+    
+    // If independent of others (or first entry)
+    if (Slot == NotFound) Slot = 0;
+    
+    // Find a slot where the needed resources are available
+    if (NI->StageBegin != NI->StageEnd)
+      Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
+      
+    // Set node slot
+    NI->Slot = Slot;
+    
+    // Insert sort based on slot
+    j = i;
+    for (; 0 < j--;) {
+      // Get prior instruction
+      NodeInfo *Other = Ordering[j];
+      // Should we look further
+      if (Slot >= Other->Slot) break;
+      // Shuffle other into ordering
+      Ordering[j + 1] = Other;
+    }
+    // Insert node in proper slot
+    if (j != i) Ordering[j + 1] = NI;
+  }
+}
+
+/// Schedule - Order nodes according to selected style.
+///
+void ScheduleDAGSimple::Schedule() {
+  // Number the nodes
+  NodeCount = std::distance(DAG.allnodes_begin(), DAG.allnodes_end());
+
+  // Set up minimum info for scheduling
+  PrepareNodeInfo();
+  // Construct node groups for flagged nodes
+  IdentifyGroups();
+  
+  // Test to see if scheduling should occur
+  bool ShouldSchedule = NodeCount > 3 && !NoSched;
+  // Don't waste time if is only entry and return
+  if (ShouldSchedule) {
+    // Get latency and resource requirements
+    GatherSchedulingInfo();
+  } else if (HasGroups) {
+    // Make sure all the groups have dominators
+    FakeGroupDominators();
+  }
+
+  // Breadth first walk of DAG
+  VisitAll();
+
+#ifndef NDEBUG
+  static unsigned Count = 0;
+  Count++;
+  for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+    NodeInfo *NI = Ordering[i];
+    NI->Preorder = i;
+  }
+#endif  
+  
+  // Don't waste time if is only entry and return
+  if (ShouldSchedule) {
+    // Push back long instructions and critical path
+    ScheduleBackward();
+    
+    // Pack instructions to maximize resource utilization
+    ScheduleForward();
+  }
+  
+  DEBUG(printChanges(Count));
+  
+  // Emit in scheduled order
+  EmitAll();
+}
+
+
+/// createSimpleDAGScheduler - This creates a simple two pass instruction
+/// scheduler using instruction itinerary.
+llvm::ScheduleDAG* llvm::createSimpleDAGScheduler(SelectionDAGISel *IS,
+                                                  SelectionDAG *DAG,
+                                                  MachineBasicBlock *BB) {
+  return new ScheduleDAGSimple(false, false, *DAG, BB, DAG->getTarget());
+}
+
+/// createNoItinsDAGScheduler - This creates a simple two pass instruction
+/// scheduler without using instruction itinerary.
+llvm::ScheduleDAG* llvm::createNoItinsDAGScheduler(SelectionDAGISel *IS,
+                                                   SelectionDAG *DAG,
+                                                   MachineBasicBlock *BB) {
+  return new ScheduleDAGSimple(false, true, *DAG, BB, DAG->getTarget());
+}
+
+/// createBFS_DAGScheduler - This creates a simple breadth first instruction
+/// scheduler.
+llvm::ScheduleDAG* llvm::createBFS_DAGScheduler(SelectionDAGISel *IS,
+                                                SelectionDAG *DAG,
+                                                MachineBasicBlock *BB) {
+  return new ScheduleDAGSimple(true, false, *DAG, BB,  DAG->getTarget());
+}

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000..9803ab8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

@@ -0,0 +1,3694 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const MVT::ValueType *VTs, unsigned NumVTs) {
+  SDVTList Res = {VTs, NumVTs};
+  return Res;
+}
+
+//===----------------------------------------------------------------------===//
+//                              ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(double V) const {
+  return DoubleToBits(V) == DoubleToBits(Value);
+}
+
+//===----------------------------------------------------------------------===//
+//                              ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BIT_CONVERT)
+    N = N->getOperand(0).Val;
+  
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+  
+  unsigned i = 0, e = N->getNumOperands();
+  
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+  
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+  
+  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // elements.
+  SDOperand NotZero = N->getOperand(i);
+  if (isa<ConstantSDNode>(NotZero)) {
+    if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(NotZero)) {
+    MVT::ValueType VT = NotZero.getValueType();
+    if (VT== MVT::f64) {
+      if (DoubleToBits(cast<ConstantFPSDNode>(NotZero)->getValue()) !=
+          (uint64_t)-1)
+        return false;
+    } else {
+      if (FloatToBits(cast<ConstantFPSDNode>(NotZero)->getValue()) !=
+          (uint32_t)-1)
+        return false;
+    }
+  } else
+    return false;
+  
+  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != NotZero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BIT_CONVERT)
+    N = N->getOperand(0).Val;
+  
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+  
+  unsigned i = 0, e = N->getNumOperands();
+  
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+  
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+  
+  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // elements.
+  SDOperand Zero = N->getOperand(i);
+  if (isa<ConstantSDNode>(Zero)) {
+    if (!cast<ConstantSDNode>(Zero)->isNullValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(Zero)) {
+    if (!cast<ConstantFPSDNode>(Zero)->isExactlyValue(0.0))
+      return false;
+  } else
+    return false;
+  
+  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != Zero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+  // To perform this operation, we just need to swap the L and G bits of the
+  // operation.
+  unsigned OldL = (Operation >> 2) & 1;
+  unsigned OldG = (Operation >> 1) & 1;
+  return ISD::CondCode((Operation & ~6) |  // Keep the N, U, E bits
+                       (OldL << 1) |       // New G bit
+                       (OldG << 2));        // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+  unsigned Operation = Op;
+  if (isInteger)
+    Operation ^= 7;   // Flip L, G, E bits, but not U.
+  else
+    Operation ^= 15;  // Flip all of the condition bits.
+  if (Operation > ISD::SETTRUE2)
+    Operation &= ~8;     // Don't let N and U bits get set.
+  return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison.  Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+  switch (Opcode) {
+  default: assert(0 && "Illegal integer setcc operation!");
+  case ISD::SETEQ:
+  case ISD::SETNE: return 0;
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETGT:
+  case ISD::SETGE: return 1;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUGT:
+  case ISD::SETUGE: return 2;
+  }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                       bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed integer setcc with an unsigned integer setcc.
+    return ISD::SETCC_INVALID;
+
+  unsigned Op = Op1 | Op2;  // Combine all of the condition bits.
+
+  // If the N and U bits get set then the resultant comparison DOES suddenly
+  // care about orderedness, and is true when ordered.
+  if (Op > ISD::SETTRUE2)
+    Op &= ~16;     // Clear the U bit if the N bit is set.
+  
+  // Canonicalize illegal integer setcc's.
+  if (isInteger && Op == ISD::SETUNE)  // e.g. SETUGT | SETULT
+    Op = ISD::SETNE;
+  
+  return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                        bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed setcc with an unsigned setcc.
+    return ISD::SETCC_INVALID;
+
+  // Combine all of the condition bits.
+  ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+  
+  // Canonicalize illegal integer setcc's.
+  if (isInteger) {
+    switch (Result) {
+    default: break;
+    case ISD::SETUO : Result = ISD::SETFALSE; break;  // SETUGT & SETULT
+    case ISD::SETUEQ: Result = ISD::SETEQ   ; break;  // SETUGE & SETULE
+    case ISD::SETOLT: Result = ISD::SETULT  ; break;  // SETULT & SETNE
+    case ISD::SETOGT: Result = ISD::SETUGT  ; break;  // SETUGT & SETNE
+    }
+  }
+  
+  return Result;
+}
+
+const TargetMachine &SelectionDAG::getTarget() const {
+  return TLI.getTargetMachine();
+}
+
+//===----------------------------------------------------------------------===//
+//                           SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC)  {
+  ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+  ID.AddPointer(VTList.VTs);  
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+                              const SDOperand *Ops, unsigned NumOps) {
+  for (; NumOps; --NumOps, ++Ops) {
+    ID.AddPointer(Ops->Val);
+    ID.AddInteger(Ops->ResNo);
+  }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+                          unsigned short OpC, SDVTList VTList, 
+                          const SDOperand *OpList, unsigned N) {
+  AddNodeIDOpcode(ID, OpC);
+  AddNodeIDValueTypes(ID, VTList);
+  AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, SDNode *N) {
+  AddNodeIDOpcode(ID, N->getOpcode());
+  // Add the return value info.
+  AddNodeIDValueTypes(ID, N->getVTList());
+  // Add the operand info.
+  AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+  // Handle SDNode leafs with special info.
+  switch (N->getOpcode()) {
+  default: break;  // Normal nodes don't need extra info.
+  case ISD::TargetConstant:
+  case ISD::Constant:
+    ID.AddInteger(cast<ConstantSDNode>(N)->getValue());
+    break;
+  case ISD::TargetConstantFP:
+  case ISD::ConstantFP:
+    ID.AddDouble(cast<ConstantFPSDNode>(N)->getValue());
+    break;
+  case ISD::TargetGlobalAddress:
+  case ISD::GlobalAddress:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::GlobalTLSAddress: {
+    GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+    ID.AddPointer(GA->getGlobal());
+    ID.AddInteger(GA->getOffset());
+    break;
+  }
+  case ISD::BasicBlock:
+    ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+    break;
+  case ISD::Register:
+    ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+    break;
+  case ISD::SRCVALUE: {
+    SrcValueSDNode *SV = cast<SrcValueSDNode>(N);
+    ID.AddPointer(SV->getValue());
+    ID.AddInteger(SV->getOffset());
+    break;
+  }
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+    break;
+  case ISD::JumpTable:
+  case ISD::TargetJumpTable:
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+    break;
+  case ISD::ConstantPool:
+  case ISD::TargetConstantPool: {
+    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+    ID.AddInteger(CP->getAlignment());
+    ID.AddInteger(CP->getOffset());
+    if (CP->isMachineConstantPoolEntry())
+      CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+    else
+      ID.AddPointer(CP->getConstVal());
+    break;
+  }
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    ID.AddInteger(LD->getAddressingMode());
+    ID.AddInteger(LD->getExtensionType());
+    ID.AddInteger(LD->getLoadedVT());
+    ID.AddPointer(LD->getSrcValue());
+    ID.AddInteger(LD->getSrcValueOffset());
+    ID.AddInteger(LD->getAlignment());
+    ID.AddInteger(LD->isVolatile());
+    break;
+  }
+  case ISD::STORE: {
+    StoreSDNode *ST = cast<StoreSDNode>(N);
+    ID.AddInteger(ST->getAddressingMode());
+    ID.AddInteger(ST->isTruncatingStore());
+    ID.AddInteger(ST->getStoredVT());
+    ID.AddPointer(ST->getSrcValue());
+    ID.AddInteger(ST->getSrcValueOffset());
+    ID.AddInteger(ST->getAlignment());
+    ID.AddInteger(ST->isVolatile());
+    break;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                              SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted.
+  HandleSDNode Dummy(getRoot());
+
+  SmallVector<SDNode*, 128> DeadNodes;
+  
+  // Add all obviously-dead nodes to the DeadNodes worklist.
+  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+    if (I->use_empty())
+      DeadNodes.push_back(I);
+
+  // Process the worklist, deleting the nodes and adding their uses to the
+  // worklist.
+  while (!DeadNodes.empty()) {
+    SDNode *N = DeadNodes.back();
+    DeadNodes.pop_back();
+    
+    // Take the node out of the appropriate CSE map.
+    RemoveNodeFromCSEMaps(N);
+
+    // Next, brutally remove the operand list.  This is safe to do, as there are
+    // no cycles in the graph.
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+      SDNode *Operand = I->Val;
+      Operand->removeUser(N);
+      
+      // Now that we removed this operand, see if there are no uses of it left.
+      if (Operand->use_empty())
+        DeadNodes.push_back(Operand);
+    }
+    if (N->OperandsNeedDelete)
+      delete[] N->OperandList;
+    N->OperandList = 0;
+    N->NumOperands = 0;
+    
+    // Finally, remove N itself.
+    AllNodes.erase(N);
+  }
+  
+  // If the root changed (e.g. it was a dead load, update the root).
+  setRoot(Dummy.getValue());
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, std::vector<SDNode*> &Deleted) {
+  SmallVector<SDNode*, 16> DeadNodes;
+  DeadNodes.push_back(N);
+
+  // Process the worklist, deleting the nodes and adding their uses to the
+  // worklist.
+  while (!DeadNodes.empty()) {
+    SDNode *N = DeadNodes.back();
+    DeadNodes.pop_back();
+    
+    // Take the node out of the appropriate CSE map.
+    RemoveNodeFromCSEMaps(N);
+
+    // Next, brutally remove the operand list.  This is safe to do, as there are
+    // no cycles in the graph.
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+      SDNode *Operand = I->Val;
+      Operand->removeUser(N);
+      
+      // Now that we removed this operand, see if there are no uses of it left.
+      if (Operand->use_empty())
+        DeadNodes.push_back(Operand);
+    }
+    if (N->OperandsNeedDelete)
+      delete[] N->OperandList;
+    N->OperandList = 0;
+    N->NumOperands = 0;
+    
+    // Finally, remove N itself.
+    Deleted.push_back(N);
+    AllNodes.erase(N);
+  }
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+  assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+  // First take this out of the appropriate CSE map.
+  RemoveNodeFromCSEMaps(N);
+
+  // Finally, remove uses due to operands of this node, remove from the 
+  // AllNodes list, and delete the node.
+  DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+
+  // Remove it from the AllNodes list.
+  AllNodes.remove(N);
+    
+  // Drop all of the operands and decrement used nodes use counts.
+  for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+    I->Val->removeUser(N);
+  if (N->OperandsNeedDelete)
+    delete[] N->OperandList;
+  N->OperandList = 0;
+  N->NumOperands = 0;
+  
+  delete N;
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it.  This is useful when we're about to delete or repurpose
+/// the node.  We don't want future request for structurally identical nodes
+/// to return N anymore.
+void SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+  bool Erased = false;
+  switch (N->getOpcode()) {
+  case ISD::HANDLENODE: return;  // noop.
+  case ISD::STRING:
+    Erased = StringNodes.erase(cast<StringSDNode>(N)->getValue());
+    break;
+  case ISD::CONDCODE:
+    assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+           "Cond code doesn't exist!");
+    Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+    CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+    break;
+  case ISD::ExternalSymbol:
+    Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+    break;
+  case ISD::TargetExternalSymbol:
+    Erased =
+      TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+    break;
+  case ISD::VALUETYPE:
+    Erased = ValueTypeNodes[cast<VTSDNode>(N)->getVT()] != 0;
+    ValueTypeNodes[cast<VTSDNode>(N)->getVT()] = 0;
+    break;
+  default:
+    // Remove it from the CSE Map.
+    Erased = CSEMap.RemoveNode(N);
+    break;
+  }
+#ifndef NDEBUG
+  // Verify that the node was actually in one of the CSE maps, unless it has a 
+  // flag result (which cannot be CSE'd) or is one of the special cases that are
+  // not subject to CSE.
+  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+      !N->isTargetOpcode()) {
+    N->dump(this);
+    cerr << "\n";
+    assert(0 && "Node is not in map!");
+  }
+#endif
+}
+
+/// AddNonLeafNodeToCSEMaps - Add the specified node back to the CSE maps.  It
+/// has been taken out and modified in some way.  If the specified node already
+/// exists in the CSE maps, do not modify the maps, but return the existing node
+/// instead.  If it doesn't exist, add it and return null.
+///
+SDNode *SelectionDAG::AddNonLeafNodeToCSEMaps(SDNode *N) {
+  assert(N->getNumOperands() && "This is a leaf node!");
+  if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+    return 0;    // Never add these nodes.
+  
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return 0;   // Never CSE anything that produces a flag.
+  
+  SDNode *New = CSEMap.GetOrInsertNode(N);
+  if (New != N) return New;  // Node already existed.
+  return 0;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized, 
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDOperand Op,
+                                           void *&InsertPos) {
+  if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+    return 0;    // Never add these nodes.
+  
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return 0;   // Never CSE anything that produces a flag.
+  
+  SDOperand Ops[] = { Op };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized, 
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, 
+                                           SDOperand Op1, SDOperand Op2,
+                                           void *&InsertPos) {
+  if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+    return 0;    // Never add these nodes.
+  
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return 0;   // Never CSE anything that produces a flag.
+                                              
+  SDOperand Ops[] = { Op1, Op2 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized, 
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, 
+                                           const SDOperand *Ops,unsigned NumOps,
+                                           void *&InsertPos) {
+  if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+    return 0;    // Never add these nodes.
+  
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Flag)
+      return 0;   // Never CSE anything that produces a flag.
+  
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+  
+  if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    ID.AddInteger(LD->getAddressingMode());
+    ID.AddInteger(LD->getExtensionType());
+    ID.AddInteger(LD->getLoadedVT());
+    ID.AddPointer(LD->getSrcValue());
+    ID.AddInteger(LD->getSrcValueOffset());
+    ID.AddInteger(LD->getAlignment());
+    ID.AddInteger(LD->isVolatile());
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    ID.AddInteger(ST->getAddressingMode());
+    ID.AddInteger(ST->isTruncatingStore());
+    ID.AddInteger(ST->getStoredVT());
+    ID.AddPointer(ST->getSrcValue());
+    ID.AddInteger(ST->getSrcValueOffset());
+    ID.AddInteger(ST->getAlignment());
+    ID.AddInteger(ST->isVolatile());
+  }
+  
+  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+
+SelectionDAG::~SelectionDAG() {
+  while (!AllNodes.empty()) {
+    SDNode *N = AllNodes.begin();
+    N->SetNextInBucket(0);
+    if (N->OperandsNeedDelete)
+      delete [] N->OperandList;
+    N->OperandList = 0;
+    N->NumOperands = 0;
+    AllNodes.pop_front();
+  }
+}
+
+SDOperand SelectionDAG::getZeroExtendInReg(SDOperand Op, MVT::ValueType VT) {
+  if (Op.getValueType() == VT) return Op;
+  int64_t Imm = ~0ULL >> (64-MVT::getSizeInBits(VT));
+  return getNode(ISD::AND, Op.getValueType(), Op,
+                 getConstant(Imm, Op.getValueType()));
+}
+
+SDOperand SelectionDAG::getString(const std::string &Val) {
+  StringSDNode *&N = StringNodes[Val];
+  if (!N) {
+    N = new StringSDNode(Val);
+    AllNodes.push_back(N);
+  }
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getConstant(uint64_t Val, MVT::ValueType VT, bool isT) {
+  assert(MVT::isInteger(VT) && "Cannot create FP integer constant!");
+  assert(!MVT::isVector(VT) && "Cannot create Vector ConstantSDNodes!");
+  
+  // Mask out any bits that are not valid for this constant.
+  Val &= MVT::getIntVTBitMask(VT);
+
+  unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Val);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new ConstantSDNode(isT, Val, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getConstantFP(double Val, MVT::ValueType VT,
+                                      bool isTarget) {
+  assert(MVT::isFloatingPoint(VT) && "Cannot create integer FP constant!");
+  MVT::ValueType EltVT =
+    MVT::isVector(VT) ? MVT::getVectorElementType(VT) : VT;
+  if (EltVT == MVT::f32)
+    Val = (float)Val;  // Mask out extra precision.
+
+  // Do the map lookup using the actual bit pattern for the floating point
+  // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+  // we don't have issues with SNANs.
+  unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+  ID.AddDouble(Val);
+  void *IP = 0;
+  SDNode *N = NULL;
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+    if (!MVT::isVector(VT))
+      return SDOperand(N, 0);
+  if (!N) {
+    N = new ConstantFPSDNode(isTarget, Val, EltVT);
+    CSEMap.InsertNode(N, IP);
+    AllNodes.push_back(N);
+  }
+
+  SDOperand Result(N, 0);
+  if (MVT::isVector(VT)) {
+    SmallVector<SDOperand, 8> Ops;
+    Ops.assign(MVT::getVectorNumElements(VT), Result);
+    Result = getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+  }
+  return Result;
+}
+
+SDOperand SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+                                         MVT::ValueType VT, int Offset,
+                                         bool isTargetGA) {
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  unsigned Opc;
+  if (GVar && GVar->isThreadLocal())
+    Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+  else
+    Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddPointer(GV);
+  ID.AddInteger(Offset);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+   return SDOperand(E, 0);
+  SDNode *N = new GlobalAddressSDNode(isTargetGA, GV, VT, Offset);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getFrameIndex(int FI, MVT::ValueType VT,
+                                      bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(FI);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new FrameIndexSDNode(FI, VT, isTarget);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getJumpTable(int JTI, MVT::ValueType VT, bool isTarget){
+  unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(JTI);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new JumpTableSDNode(JTI, VT, isTarget);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getConstantPool(Constant *C, MVT::ValueType VT,
+                                        unsigned Alignment, int Offset,
+                                        bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  ID.AddPointer(C);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getConstantPool(MachineConstantPoolValue *C,
+                                        MVT::ValueType VT,
+                                        unsigned Alignment, int Offset,
+                                        bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  C->AddSelectionDAGCSEId(ID);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MBB);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new BasicBlockSDNode(MBB);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getValueType(MVT::ValueType VT) {
+  if ((unsigned)VT >= ValueTypeNodes.size())
+    ValueTypeNodes.resize(VT+1);
+  if (ValueTypeNodes[VT] == 0) {
+    ValueTypeNodes[VT] = new VTSDNode(VT);
+    AllNodes.push_back(ValueTypeNodes[VT]);
+  }
+
+  return SDOperand(ValueTypeNodes[VT], 0);
+}
+
+SDOperand SelectionDAG::getExternalSymbol(const char *Sym, MVT::ValueType VT) {
+  SDNode *&N = ExternalSymbols[Sym];
+  if (N) return SDOperand(N, 0);
+  N = new ExternalSymbolSDNode(false, Sym, VT);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getTargetExternalSymbol(const char *Sym,
+                                                MVT::ValueType VT) {
+  SDNode *&N = TargetExternalSymbols[Sym];
+  if (N) return SDOperand(N, 0);
+  N = new ExternalSymbolSDNode(true, Sym, VT);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getCondCode(ISD::CondCode Cond) {
+  if ((unsigned)Cond >= CondCodeNodes.size())
+    CondCodeNodes.resize(Cond+1);
+  
+  if (CondCodeNodes[Cond] == 0) {
+    CondCodeNodes[Cond] = new CondCodeSDNode(Cond);
+    AllNodes.push_back(CondCodeNodes[Cond]);
+  }
+  return SDOperand(CondCodeNodes[Cond], 0);
+}
+
+SDOperand SelectionDAG::getRegister(unsigned RegNo, MVT::ValueType VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+  ID.AddInteger(RegNo);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new RegisterSDNode(RegNo, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getSrcValue(const Value *V, int Offset) {
+  assert((!V || isa<PointerType>(V->getType())) &&
+         "SrcValue is not a pointer?");
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(V);
+  ID.AddInteger(Offset);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new SrcValueSDNode(V, Offset);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::FoldSetCC(MVT::ValueType VT, SDOperand N1,
+                                  SDOperand N2, ISD::CondCode Cond) {
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return getConstant(1, VT);
+    
+  case ISD::SETOEQ:
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETONE:
+  case ISD::SETO:
+  case ISD::SETUO:
+  case ISD::SETUEQ:
+  case ISD::SETUNE:
+    assert(!MVT::isInteger(N1.getValueType()) && "Illegal setcc for integer!");
+    break;
+  }
+  
+  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val)) {
+    uint64_t C2 = N2C->getValue();
+    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) {
+      uint64_t C1 = N1C->getValue();
+      
+      // Sign extend the operands if required
+      if (ISD::isSignedIntSetCC(Cond)) {
+        C1 = N1C->getSignExtended();
+        C2 = N2C->getSignExtended();
+      }
+      
+      switch (Cond) {
+      default: assert(0 && "Unknown integer setcc!");
+      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, VT);
+      case ISD::SETULT: return getConstant(C1 <  C2, VT);
+      case ISD::SETUGT: return getConstant(C1 >  C2, VT);
+      case ISD::SETULE: return getConstant(C1 <= C2, VT);
+      case ISD::SETUGE: return getConstant(C1 >= C2, VT);
+      case ISD::SETLT:  return getConstant((int64_t)C1 <  (int64_t)C2, VT);
+      case ISD::SETGT:  return getConstant((int64_t)C1 >  (int64_t)C2, VT);
+      case ISD::SETLE:  return getConstant((int64_t)C1 <= (int64_t)C2, VT);
+      case ISD::SETGE:  return getConstant((int64_t)C1 >= (int64_t)C2, VT);
+      }
+    }
+  }
+  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.Val))
+    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.Val)) {
+      double C1 = N1C->getValue(), C2 = N2C->getValue();
+      
+      switch (Cond) {
+      default: break; // FIXME: Implement the rest of these!
+      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, VT);
+      case ISD::SETLT:  return getConstant(C1 < C2, VT);
+      case ISD::SETGT:  return getConstant(C1 > C2, VT);
+      case ISD::SETLE:  return getConstant(C1 <= C2, VT);
+      case ISD::SETGE:  return getConstant(C1 >= C2, VT);
+      }
+    } else {
+      // Ensure that the constant occurs on the RHS.
+      return getSetCC(VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+    }
+      
+  // Could not fold it.
+  return SDOperand();
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+/// this predicate to simplify operations downstream.  Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDOperand Op, uint64_t Mask, 
+                                     unsigned Depth) const {
+  // The masks are not wide enough to represent this type!  Should use APInt.
+  if (Op.getValueType() == MVT::i128)
+    return false;
+  
+  uint64_t KnownZero, KnownOne;
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+  return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDOperand Op, uint64_t Mask, 
+                                     uint64_t &KnownZero, uint64_t &KnownOne,
+                                     unsigned Depth) const {
+  KnownZero = KnownOne = 0;   // Don't know anything.
+  if (Depth == 6 || Mask == 0)
+    return;  // Limit search depth.
+  
+  // The masks are not wide enough to represent this type!  Should use APInt.
+  if (Op.getValueType() == MVT::i128)
+    return;
+  
+  uint64_t KnownZero2, KnownOne2;
+
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask;
+    KnownZero = ~KnownOne & Mask;
+    return;
+  case ISD::AND:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    Mask &= ~KnownZero;
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    return;
+  case ISD::OR:
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    Mask &= ~KnownOne;
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    return;
+  case ISD::XOR: {
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    return;
+  }
+  case ISD::SELECT:
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SELECT_CC:
+    ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SETCC:
+    // If we know the result of a setcc has the top bits zero, use this info.
+    if (TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
+      KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+    return;
+  case ISD::SHL:
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero <<= SA->getValue();
+      KnownOne  <<= SA->getValue();
+      KnownZero |= (1ULL << SA->getValue())-1;  // low bits known zero.
+    }
+    return;
+  case ISD::SRL:
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+      ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask,
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= MVT::getSizeInBits(VT)-ShAmt;
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    return;
+  case ISD::SRA:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+
+      // Compute the new bits that are at the top now.
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+
+      uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask;
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
+      if (HighBits & Mask)
+        InDemandedMask |= MVT::getIntVTSignBit(VT);
+      
+      ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+      
+      // Handle the sign bits.
+      uint64_t SignBit = MVT::getIntVTSignBit(VT);
+      SignBit >>= ShAmt;  // Adjust to where it is now in the mask.
+      
+      if (KnownZero & SignBit) {       
+        KnownZero |= HighBits;  // New bits are known zero.
+      } else if (KnownOne & SignBit) {
+        KnownOne  |= HighBits;  // New bits are known one.
+      }
+    }
+    return;
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    
+    // Sign extension.  Compute the demanded bits in the result that are not 
+    // present in the input.
+    uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask;
+
+    uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
+    int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT);
+    
+    // If the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (NewBits)
+      InputDemandedBits |= InSignBit;
+    
+    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+                      KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero & InSignBit) {          // Input sign bit known clear
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne & InSignBit) {    // Input sign bit known set
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                              // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne  &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP: {
+    MVT::ValueType VT = Op.getValueType();
+    unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
+    KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
+    KnownOne  = 0;
+    return;
+  }
+  case ISD::LOAD: {
+    if (ISD::isZEXTLoad(Op.Val)) {
+      LoadSDNode *LD = cast<LoadSDNode>(Op);
+      MVT::ValueType VT = LD->getLoadedVT();
+      KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask;
+    }
+    return;
+  }
+  case ISD::ZERO_EXTEND: {
+    uint64_t InMask  = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    uint64_t NewBits = (~InMask) & Mask;
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 
+                      KnownOne, Depth+1);
+    KnownZero |= NewBits & Mask;
+    KnownOne  &= ~NewBits;
+    return;
+  }
+  case ISD::SIGN_EXTEND: {
+    MVT::ValueType InVT = Op.getOperand(0).getValueType();
+    unsigned InBits    = MVT::getSizeInBits(InVT);
+    uint64_t InMask    = MVT::getIntVTBitMask(InVT);
+    uint64_t InSignBit = 1ULL << (InBits-1);
+    uint64_t NewBits   = (~InMask) & Mask;
+    uint64_t InDemandedBits = Mask & InMask;
+
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (NewBits & Mask)
+      InDemandedBits |= InSignBit;
+    
+    ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+                      KnownOne, Depth+1);
+    // If the sign bit is known zero or one, the  top bits match.
+    if (KnownZero & InSignBit) {
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne & InSignBit) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::ANY_EXTEND: {
+    MVT::ValueType VT = Op.getOperand(0).getValueType();
+    ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT),
+                      KnownZero, KnownOne, Depth+1);
+    return;
+  }
+  case ISD::TRUNCATE: {
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+    KnownZero &= OutMask;
+    KnownOne &= OutMask;
+    break;
+  }
+  case ISD::AssertZext: {
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    uint64_t InMask = MVT::getIntVTBitMask(VT);
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 
+                      KnownOne, Depth+1);
+    KnownZero |= (~InMask) & Mask;
+    return;
+  }
+  case ISD::ADD: {
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are known if clear or set in both the low clear bits
+    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
+    // low 3 bits clear.
+    uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 
+                                     CountTrailingZeros_64(~KnownZero2));
+    
+    KnownZero = (1ULL << KnownZeroOut) - 1;
+    KnownOne = 0;
+    return;
+  }
+  case ISD::SUB: {
+    ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+    if (!CLHS) return;
+
+    // We know that the top bits of C-X are clear if X contains less bits
+    // than C (i.e. no wrap-around can happen).  For example, 20-X is
+    // positive if we can prove that X is >= 0 and < 16.
+    MVT::ValueType VT = CLHS->getValueType(0);
+    if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) {  // sign bit clear
+      unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1);
+      uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit
+      MaskV = ~MaskV & MVT::getIntVTBitMask(VT);
+      ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1);
+
+      // If all of the MaskV bits are known to be zero, then we know the output
+      // top bits are zero, because we now know that the output is from [0-C].
+      if ((KnownZero & MaskV) == MaskV) {
+        unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue());
+        KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask;  // Top bits known zero.
+        KnownOne = 0;   // No one bits known.
+      } else {
+        KnownZero = KnownOne = 0;  // Otherwise, nothing known.
+      }
+    }
+    return;
+  }
+  default:
+    // Allow the target to implement this method for its nodes.
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);
+    }
+    return;
+  }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{
+  MVT::ValueType VT = Op.getValueType();
+  assert(MVT::isInteger(VT) && "Invalid VT!");
+  unsigned VTBits = MVT::getSizeInBits(VT);
+  unsigned Tmp, Tmp2;
+  
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::AssertSext:
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    return VTBits-Tmp+1;
+  case ISD::AssertZext:
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    return VTBits-Tmp;
+    
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(Op)->getValue();
+    // If negative, invert the bits, then look at it.
+    if (Val & MVT::getIntVTSignBit(VT))
+      Val = ~Val;
+    
+    // Shift the bits so they are the leading bits in the int64_t.
+    Val <<= 64-VTBits;
+    
+    // Return # leading zeros.  We use 'min' here in case Val was zero before
+    // shifting.  We don't want to return '64' as for an i32 "0".
+    return std::min(VTBits, CountLeadingZeros_64(Val));
+  }
+    
+  case ISD::SIGN_EXTEND:
+    Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType());
+    return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+    
+  case ISD::SIGN_EXTEND_INREG:
+    // Max of the input and what this extends.
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    Tmp = VTBits-Tmp+1;
+    
+    Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    return std::max(Tmp, Tmp2);
+
+  case ISD::SRA:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    // SRA X, C   -> adds C sign bits.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      Tmp += C->getValue();
+      if (Tmp > VTBits) Tmp = VTBits;
+    }
+    return Tmp;
+  case ISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (C->getValue() >= VTBits ||      // Bad shift.
+          C->getValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getValue();
+    }
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    return std::min(Tmp, Tmp2);
+
+  case ISD::SELECT:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    return std::min(Tmp, Tmp2);
+    
+  case ISD::SETCC:
+    // If setcc returns 0/-1, all bits are sign bits.
+    if (TLI.getSetCCResultContents() ==
+        TargetLowering::ZeroOrNegativeOneSetCCResult)
+      return VTBits;
+    break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RotAmt = C->getValue() & (VTBits-1);
+      
+      // Handle rotate right by N like a rotate left by 32-N.
+      if (Op.getOpcode() == ISD::ROTR)
+        RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+      // If we aren't rotating out all of the known-in sign bits, return the
+      // number that are left.  This handles rotl(sext(x), 1) for example.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (Tmp > RotAmt+1) return Tmp-RotAmt;
+    }
+    break;
+  case ISD::ADD:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      
+    // Special case decrementing a value (ADD X, -1):
+    if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CRHS->isAllOnesValue()) {
+        uint64_t KnownZero, KnownOne;
+        uint64_t Mask = MVT::getIntVTBitMask(VT);
+        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+        
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero|1) == Mask)
+          return VTBits;
+        
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero & MVT::getIntVTSignBit(VT))
+          return Tmp;
+      }
+      
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      return std::min(Tmp, Tmp2)-1;
+    break;
+    
+  case ISD::SUB:
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      
+    // Handle NEG.
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CLHS->getValue() == 0) {
+        uint64_t KnownZero, KnownOne;
+        uint64_t Mask = MVT::getIntVTBitMask(VT);
+        ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero|1) == Mask)
+          return VTBits;
+        
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero & MVT::getIntVTSignBit(VT))
+          return Tmp2;
+        
+        // Otherwise, we treat this like a SUB.
+      }
+    
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      return std::min(Tmp, Tmp2)-1;
+    break;
+  case ISD::TRUNCATE:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+  
+  // Handle LOADX separately here. EXTLOAD case will fallthrough.
+  if (Op.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    unsigned ExtType = LD->getExtensionType();
+    switch (ExtType) {
+    default: break;
+    case ISD::SEXTLOAD:    // '17' bits known
+      Tmp = MVT::getSizeInBits(LD->getLoadedVT());
+      return VTBits-Tmp+1;
+    case ISD::ZEXTLOAD:    // '16' bits known
+      Tmp = MVT::getSizeInBits(LD->getLoadedVT());
+      return VTBits-Tmp;
+    }
+  }
+
+  // Allow the target to implement this method for its nodes.
+  if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 
+      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_VOID) {
+    unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+    if (NumBits > 1) return NumBits;
+  }
+  
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  uint64_t KnownZero, KnownOne;
+  uint64_t Mask = MVT::getIntVTBitMask(VT);
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  
+  uint64_t SignBit = MVT::getIntVTSignBit(VT);
+  if (KnownZero & SignBit) {        // SignBit is 0
+    Mask = KnownZero;
+  } else if (KnownOne & SignBit) {  // SignBit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return 1;
+  }
+  
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask ^= ~0ULL;
+  Mask <<= 64-VTBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::min(VTBits, CountLeadingZeros_64(Mask));
+}
+
+
+/// getNode - Gets or creates the specified node.
+///
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new SDNode(Opcode, SDNode::getSDVTList(VT));
+  CSEMap.InsertNode(N, IP);
+  
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand Operand) {
+  unsigned Tmp1;
+  // Constant fold unary operations with an integer constant operand.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.Val)) {
+    uint64_t Val = C->getValue();
+    switch (Opcode) {
+    default: break;
+    case ISD::SIGN_EXTEND: return getConstant(C->getSignExtended(), VT);
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND: return getConstant(Val, VT);
+    case ISD::TRUNCATE:    return getConstant(Val, VT);
+    case ISD::SINT_TO_FP:  return getConstantFP(C->getSignExtended(), VT);
+    case ISD::UINT_TO_FP:  return getConstantFP(C->getValue(), VT);
+    case ISD::BIT_CONVERT:
+      if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+        return getConstantFP(BitsToFloat(Val), VT);
+      else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+        return getConstantFP(BitsToDouble(Val), VT);
+      break;
+    case ISD::BSWAP:
+      switch(VT) {
+      default: assert(0 && "Invalid bswap!"); break;
+      case MVT::i16: return getConstant(ByteSwap_16((unsigned short)Val), VT);
+      case MVT::i32: return getConstant(ByteSwap_32((unsigned)Val), VT);
+      case MVT::i64: return getConstant(ByteSwap_64(Val), VT);
+      }
+      break;
+    case ISD::CTPOP:
+      switch(VT) {
+      default: assert(0 && "Invalid ctpop!"); break;
+      case MVT::i1: return getConstant(Val != 0, VT);
+      case MVT::i8: 
+        Tmp1 = (unsigned)Val & 0xFF;
+        return getConstant(CountPopulation_32(Tmp1), VT);
+      case MVT::i16:
+        Tmp1 = (unsigned)Val & 0xFFFF;
+        return getConstant(CountPopulation_32(Tmp1), VT);
+      case MVT::i32:
+        return getConstant(CountPopulation_32((unsigned)Val), VT);
+      case MVT::i64:
+        return getConstant(CountPopulation_64(Val), VT);
+      }
+    case ISD::CTLZ:
+      switch(VT) {
+      default: assert(0 && "Invalid ctlz!"); break;
+      case MVT::i1: return getConstant(Val == 0, VT);
+      case MVT::i8: 
+        Tmp1 = (unsigned)Val & 0xFF;
+        return getConstant(CountLeadingZeros_32(Tmp1)-24, VT);
+      case MVT::i16:
+        Tmp1 = (unsigned)Val & 0xFFFF;
+        return getConstant(CountLeadingZeros_32(Tmp1)-16, VT);
+      case MVT::i32:
+        return getConstant(CountLeadingZeros_32((unsigned)Val), VT);
+      case MVT::i64:
+        return getConstant(CountLeadingZeros_64(Val), VT);
+      }
+    case ISD::CTTZ:
+      switch(VT) {
+      default: assert(0 && "Invalid cttz!"); break;
+      case MVT::i1: return getConstant(Val == 0, VT);
+      case MVT::i8: 
+        Tmp1 = (unsigned)Val | 0x100;
+        return getConstant(CountTrailingZeros_32(Tmp1), VT);
+      case MVT::i16:
+        Tmp1 = (unsigned)Val | 0x10000;
+        return getConstant(CountTrailingZeros_32(Tmp1), VT);
+      case MVT::i32:
+        return getConstant(CountTrailingZeros_32((unsigned)Val), VT);
+      case MVT::i64:
+        return getConstant(CountTrailingZeros_64(Val), VT);
+      }
+    }
+  }
+
+  // Constant fold unary operations with an floating point constant operand.
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.Val))
+    switch (Opcode) {
+    case ISD::FNEG:
+      return getConstantFP(-C->getValue(), VT);
+    case ISD::FABS:
+      return getConstantFP(fabs(C->getValue()), VT);
+    case ISD::FP_ROUND:
+    case ISD::FP_EXTEND:
+      return getConstantFP(C->getValue(), VT);
+    case ISD::FP_TO_SINT:
+      return getConstant((int64_t)C->getValue(), VT);
+    case ISD::FP_TO_UINT:
+      return getConstant((uint64_t)C->getValue(), VT);
+    case ISD::BIT_CONVERT:
+      if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+        return getConstant(FloatToBits(C->getValue()), VT);
+      else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+        return getConstant(DoubleToBits(C->getValue()), VT);
+      break;
+    }
+
+  unsigned OpOpcode = Operand.Val->getOpcode();
+  switch (Opcode) {
+  case ISD::TokenFactor:
+    return Operand;         // Factor of one node?  No factor.
+  case ISD::FP_ROUND:
+  case ISD::FP_EXTEND:
+    assert(MVT::isFloatingPoint(VT) &&
+           MVT::isFloatingPoint(Operand.getValueType()) && "Invalid FP cast!");
+    break;
+  case ISD::SIGN_EXTEND:
+    assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+           "Invalid SIGN_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType() < VT && "Invalid sext node, dst < src!");
+    if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+      return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+    break;
+  case ISD::ZERO_EXTEND:
+    assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+           "Invalid ZERO_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType() < VT && "Invalid zext node, dst < src!");
+    if (OpOpcode == ISD::ZERO_EXTEND)   // (zext (zext x)) -> (zext x)
+      return getNode(ISD::ZERO_EXTEND, VT, Operand.Val->getOperand(0));
+    break;
+  case ISD::ANY_EXTEND:
+    assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+           "Invalid ANY_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType() < VT && "Invalid anyext node, dst < src!");
+    if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+      // (ext (zext x)) -> (zext x)  and  (ext (sext x)) -> (sext x)
+      return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+    break;
+  case ISD::TRUNCATE:
+    assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+           "Invalid TRUNCATE!");
+    if (Operand.getValueType() == VT) return Operand;   // noop truncate
+    assert(Operand.getValueType() > VT && "Invalid truncate node, src < dst!");
+    if (OpOpcode == ISD::TRUNCATE)
+      return getNode(ISD::TRUNCATE, VT, Operand.Val->getOperand(0));
+    else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+             OpOpcode == ISD::ANY_EXTEND) {
+      // If the source is smaller than the dest, we still need an extend.
+      if (Operand.Val->getOperand(0).getValueType() < VT)
+        return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+      else if (Operand.Val->getOperand(0).getValueType() > VT)
+        return getNode(ISD::TRUNCATE, VT, Operand.Val->getOperand(0));
+      else
+        return Operand.Val->getOperand(0);
+    }
+    break;
+  case ISD::BIT_CONVERT:
+    // Basic sanity checking.
+    assert(MVT::getSizeInBits(VT) == MVT::getSizeInBits(Operand.getValueType())
+           && "Cannot BIT_CONVERT between types of different sizes!");
+    if (VT == Operand.getValueType()) return Operand;  // noop conversion.
+    if (OpOpcode == ISD::BIT_CONVERT)  // bitconv(bitconv(x)) -> bitconv(x)
+      return getNode(ISD::BIT_CONVERT, VT, Operand.getOperand(0));
+    if (OpOpcode == ISD::UNDEF)
+      return getNode(ISD::UNDEF, VT);
+    break;
+  case ISD::SCALAR_TO_VECTOR:
+    assert(MVT::isVector(VT) && !MVT::isVector(Operand.getValueType()) &&
+           MVT::getVectorElementType(VT) == Operand.getValueType() &&
+           "Illegal SCALAR_TO_VECTOR node!");
+    break;
+  case ISD::FNEG:
+    if (OpOpcode == ISD::FSUB)   // -(X-Y) -> (Y-X)
+      return getNode(ISD::FSUB, VT, Operand.Val->getOperand(1),
+                     Operand.Val->getOperand(0));
+    if (OpOpcode == ISD::FNEG)  // --X -> X
+      return Operand.Val->getOperand(0);
+    break;
+  case ISD::FABS:
+    if (OpOpcode == ISD::FNEG)  // abs(-X) -> abs(X)
+      return getNode(ISD::FABS, VT, Operand.Val->getOperand(0));
+    break;
+  }
+
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+    FoldingSetNodeID ID;
+    SDOperand Ops[1] = { Operand };
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    N = new UnarySDNode(Opcode, VTs, Operand);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new UnarySDNode(Opcode, VTs, Operand);
+  }
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand N1, SDOperand N2) {
+#ifndef NDEBUG
+  switch (Opcode) {
+  case ISD::TokenFactor:
+    assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+           N2.getValueType() == MVT::Other && "Invalid token factor!");
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::MULHU:
+  case ISD::MULHS:
+    assert(MVT::isInteger(VT) && "This operator does not apply to FP types!");
+    // fall through
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::SREM:
+    assert(MVT::isInteger(N1.getValueType()) && "Should use F* for FP ops");
+    // fall through.
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    break;
+  case ISD::FCOPYSIGN:   // N1 and result must match.  N1/N2 need not match.
+    assert(N1.getValueType() == VT &&
+           MVT::isFloatingPoint(N1.getValueType()) && 
+           MVT::isFloatingPoint(N2.getValueType()) &&
+           "Invalid FCOPYSIGN!");
+    break;
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+    assert(VT == N1.getValueType() &&
+           "Shift operators return type must be the same as their first arg");
+    assert(MVT::isInteger(VT) && MVT::isInteger(N2.getValueType()) &&
+           VT != MVT::i1 && "Shifts only work on integers");
+    break;
+  case ISD::FP_ROUND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg round!");
+    assert(MVT::isFloatingPoint(VT) && MVT::isFloatingPoint(EVT) &&
+           "Cannot FP_ROUND_INREG integer types");
+    assert(EVT <= VT && "Not rounding down!");
+    break;
+  }
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg extend!");
+    assert(MVT::isInteger(VT) && MVT::isInteger(EVT) &&
+           "Cannot *_EXTEND_INREG FP types");
+    assert(EVT <= VT && "Not extending!");
+  }
+
+  default: break;
+  }
+#endif
+
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+  if (N1C) {
+    if (Opcode == ISD::SIGN_EXTEND_INREG) {
+      int64_t Val = N1C->getValue();
+      unsigned FromBits = MVT::getSizeInBits(cast<VTSDNode>(N2)->getVT());
+      Val <<= 64-FromBits;
+      Val >>= 64-FromBits;
+      return getConstant(Val, VT);
+    }
+    
+    if (N2C) {
+      uint64_t C1 = N1C->getValue(), C2 = N2C->getValue();
+      switch (Opcode) {
+      case ISD::ADD: return getConstant(C1 + C2, VT);
+      case ISD::SUB: return getConstant(C1 - C2, VT);
+      case ISD::MUL: return getConstant(C1 * C2, VT);
+      case ISD::UDIV:
+        if (C2) return getConstant(C1 / C2, VT);
+        break;
+      case ISD::UREM :
+        if (C2) return getConstant(C1 % C2, VT);
+        break;
+      case ISD::SDIV :
+        if (C2) return getConstant(N1C->getSignExtended() /
+                                   N2C->getSignExtended(), VT);
+        break;
+      case ISD::SREM :
+        if (C2) return getConstant(N1C->getSignExtended() %
+                                   N2C->getSignExtended(), VT);
+        break;
+      case ISD::AND  : return getConstant(C1 & C2, VT);
+      case ISD::OR   : return getConstant(C1 | C2, VT);
+      case ISD::XOR  : return getConstant(C1 ^ C2, VT);
+      case ISD::SHL  : return getConstant(C1 << C2, VT);
+      case ISD::SRL  : return getConstant(C1 >> C2, VT);
+      case ISD::SRA  : return getConstant(N1C->getSignExtended() >>(int)C2, VT);
+      case ISD::ROTL : 
+        return getConstant((C1 << C2) | (C1 >> (MVT::getSizeInBits(VT) - C2)),
+                           VT);
+      case ISD::ROTR : 
+        return getConstant((C1 >> C2) | (C1 << (MVT::getSizeInBits(VT) - C2)), 
+                           VT);
+      default: break;
+      }
+    } else {      // Cannonicalize constant to RHS if commutative
+      if (isCommutativeBinOp(Opcode)) {
+        std::swap(N1C, N2C);
+        std::swap(N1, N2);
+      }
+    }
+  }
+
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.Val);
+  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.Val);
+  if (N1CFP) {
+    if (N2CFP) {
+      double C1 = N1CFP->getValue(), C2 = N2CFP->getValue();
+      switch (Opcode) {
+      case ISD::FADD: return getConstantFP(C1 + C2, VT);
+      case ISD::FSUB: return getConstantFP(C1 - C2, VT);
+      case ISD::FMUL: return getConstantFP(C1 * C2, VT);
+      case ISD::FDIV:
+        if (C2) return getConstantFP(C1 / C2, VT);
+        break;
+      case ISD::FREM :
+        if (C2) return getConstantFP(fmod(C1, C2), VT);
+        break;
+      case ISD::FCOPYSIGN: {
+        union {
+          double   F;
+          uint64_t I;
+        } u1;
+        u1.F = C1;
+        if (int64_t(DoubleToBits(C2)) < 0)  // Sign bit of RHS set?
+          u1.I |= 1ULL << 63;      // Set the sign bit of the LHS.
+        else 
+          u1.I &= (1ULL << 63)-1;  // Clear the sign bit of the LHS.
+        return getConstantFP(u1.F, VT);
+      }
+      default: break;
+      }
+    } else {      // Cannonicalize constant to RHS if commutative
+      if (isCommutativeBinOp(Opcode)) {
+        std::swap(N1CFP, N2CFP);
+        std::swap(N1, N2);
+      }
+    }
+  }
+  
+  // Canonicalize an UNDEF to the RHS, even over a constant.
+  if (N1.getOpcode() == ISD::UNDEF) {
+    if (isCommutativeBinOp(Opcode)) {
+      std::swap(N1, N2);
+    } else {
+      switch (Opcode) {
+      case ISD::FP_ROUND_INREG:
+      case ISD::SIGN_EXTEND_INREG:
+      case ISD::SUB:
+      case ISD::FSUB:
+      case ISD::FDIV:
+      case ISD::FREM:
+      case ISD::SRA:
+        return N1;     // fold op(undef, arg2) -> undef
+      case ISD::UDIV:
+      case ISD::SDIV:
+      case ISD::UREM:
+      case ISD::SREM:
+      case ISD::SRL:
+      case ISD::SHL:
+        if (!MVT::isVector(VT)) 
+          return getConstant(0, VT);    // fold op(undef, arg2) -> 0
+        // For vectors, we can't easily build an all zero vector, just return
+        // the LHS.
+        return N2;
+      }
+    }
+  }
+  
+  // Fold a bunch of operators when the RHS is undef. 
+  if (N2.getOpcode() == ISD::UNDEF) {
+    switch (Opcode) {
+    case ISD::ADD:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::SUB:
+    case ISD::FADD:
+    case ISD::FSUB:
+    case ISD::FMUL:
+    case ISD::FDIV:
+    case ISD::FREM:
+    case ISD::UDIV:
+    case ISD::SDIV:
+    case ISD::UREM:
+    case ISD::SREM:
+    case ISD::XOR:
+      return N2;       // fold op(arg1, undef) -> undef
+    case ISD::MUL: 
+    case ISD::AND:
+    case ISD::SRL:
+    case ISD::SHL:
+      if (!MVT::isVector(VT)) 
+        return getConstant(0, VT);  // fold op(arg1, undef) -> 0
+      // For vectors, we can't easily build an all zero vector, just return
+      // the LHS.
+      return N1;
+    case ISD::OR:
+      if (!MVT::isVector(VT)) 
+        return getConstant(MVT::getIntVTBitMask(VT), VT);
+      // For vectors, we can't easily build an all one vector, just return
+      // the LHS.
+      return N1;
+    case ISD::SRA:
+      return N1;
+    }
+  }
+
+  // Fold operations.
+  switch (Opcode) {
+  case ISD::TokenFactor:
+    // Fold trivial token factors.
+    if (N1.getOpcode() == ISD::EntryToken) return N2;
+    if (N2.getOpcode() == ISD::EntryToken) return N1;
+    break;
+      
+  case ISD::AND:
+    // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's
+    // worth handling here.
+    if (N2C && N2C->getValue() == 0)
+      return N2;
+    break;
+  case ISD::OR:
+  case ISD::XOR:
+    // (X ^| 0) -> X.  This commonly occurs when legalizing i64 values, so it's
+    // worth handling here.
+    if (N2C && N2C->getValue() == 0)
+      return N1;
+    break;
+  case ISD::FP_ROUND_INREG:
+    if (cast<VTSDNode>(N2)->getVT() == VT) return N1;  // Not actually rounding.
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+    if (EVT == VT) return N1;  // Not actually extending
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT:
+    assert(N2C && "Bad EXTRACT_VECTOR_ELT!");
+
+    // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+    // expanding copies of large vectors from registers.
+    if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+        N1.getNumOperands() > 0) {
+      unsigned Factor =
+        MVT::getVectorNumElements(N1.getOperand(0).getValueType());
+      return getNode(ISD::EXTRACT_VECTOR_ELT, VT,
+                     N1.getOperand(N2C->getValue() / Factor),
+                     getConstant(N2C->getValue() % Factor, N2.getValueType()));
+    }
+
+    // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+    // expanding large vector constants.
+    if (N1.getOpcode() == ISD::BUILD_VECTOR)
+      return N1.getOperand(N2C->getValue());
+
+    // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+    // operations are lowered to scalars.
+    if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT)
+      if (ConstantSDNode *IEC = dyn_cast<ConstantSDNode>(N1.getOperand(2))) {
+        if (IEC == N2C)
+          return N1.getOperand(1);
+        else
+          return getNode(ISD::EXTRACT_VECTOR_ELT, VT, N1.getOperand(0), N2);
+      }
+    break;
+  case ISD::EXTRACT_ELEMENT:
+    assert(N2C && (unsigned)N2C->getValue() < 2 && "Bad EXTRACT_ELEMENT!");
+    
+    // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+    // 64-bit integers into 32-bit parts.  Instead of building the extract of
+    // the BUILD_PAIR, only to have legalize rip it apart, just do it now. 
+    if (N1.getOpcode() == ISD::BUILD_PAIR)
+      return N1.getOperand(N2C->getValue());
+    
+    // EXTRACT_ELEMENT of a constant int is also very common.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      unsigned Shift = MVT::getSizeInBits(VT) * N2C->getValue();
+      return getConstant(C->getValue() >> Shift, VT);
+    }
+    break;
+
+  // FIXME: figure out how to safely handle things like
+  // int foo(int x) { return 1 << (x & 255); }
+  // int bar() { return foo(256); }
+#if 0
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+    if (N2.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+        cast<VTSDNode>(N2.getOperand(1))->getVT() != MVT::i1)
+      return getNode(Opcode, VT, N1, N2.getOperand(0));
+    else if (N2.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N2.getOperand(1))) {
+        // If the and is only masking out bits that cannot effect the shift,
+        // eliminate the and.
+        unsigned NumBits = MVT::getSizeInBits(VT);
+        if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+          return getNode(Opcode, VT, N1, N2.getOperand(0));
+      }
+    break;
+#endif
+  }
+
+  // Memoize this node if possible.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) {
+    SDOperand Ops[] = { N1, N2 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    N = new BinarySDNode(Opcode, VTs, N1, N2);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new BinarySDNode(Opcode, VTs, N1, N2);
+  }
+
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand N1, SDOperand N2, SDOperand N3) {
+  // Perform various simplifications.
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+  switch (Opcode) {
+  case ISD::SETCC: {
+    // Use FoldSetCC to simplify SETCC's.
+    SDOperand Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get());
+    if (Simp.Val) return Simp;
+    break;
+  }
+  case ISD::SELECT:
+    if (N1C)
+      if (N1C->getValue())
+        return N2;             // select true, X, Y -> X
+      else
+        return N3;             // select false, X, Y -> Y
+
+    if (N2 == N3) return N2;   // select C, X, X -> X
+    break;
+  case ISD::BRCOND:
+    if (N2C)
+      if (N2C->getValue()) // Unconditional branch
+        return getNode(ISD::BR, MVT::Other, N1, N3);
+      else
+        return N1;         // Never-taken branch
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+           MVT::isVector(VT) && MVT::isVector(N3.getValueType()) &&
+           N3.getOpcode() == ISD::BUILD_VECTOR &&
+           MVT::getVectorNumElements(VT) == N3.getNumOperands() &&
+           "Illegal VECTOR_SHUFFLE node!");
+    break;
+  case ISD::BIT_CONVERT:
+    // Fold bit_convert nodes from a type to themselves.
+    if (N1.getValueType() == VT)
+      return N1;
+    break;
+  }
+
+  // Memoize node if it doesn't produce a flag.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) {
+    SDOperand Ops[] = { N1, N2, N3 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    N = new TernarySDNode(Opcode, VTs, N1, N2, N3);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new TernarySDNode(Opcode, VTs, N1, N2, N3);
+  }
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand N1, SDOperand N2, SDOperand N3,
+                                SDOperand N4) {
+  SDOperand Ops[] = { N1, N2, N3, N4 };
+  return getNode(Opcode, VT, Ops, 4);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                SDOperand N1, SDOperand N2, SDOperand N3,
+                                SDOperand N4, SDOperand N5) {
+  SDOperand Ops[] = { N1, N2, N3, N4, N5 };
+  return getNode(Opcode, VT, Ops, 5);
+}
+
+SDOperand SelectionDAG::getLoad(MVT::ValueType VT,
+                                SDOperand Chain, SDOperand Ptr,
+                                const Value *SV, int SVOffset,
+                                bool isVolatile, unsigned Alignment) {
+  if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+    const Type *Ty = 0;
+    if (VT != MVT::iPTR) {
+      Ty = MVT::getTypeForValueType(VT);
+    } else if (SV) {
+      const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+      assert(PT && "Value for load must be a pointer");
+      Ty = PT->getElementType();
+    }  
+    assert(Ty && "Could not get type information for load");
+    Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+  }
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+  SDOperand Ops[] = { Chain, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(ISD::UNINDEXED);
+  ID.AddInteger(ISD::NON_EXTLOAD);
+  ID.AddInteger(VT);
+  ID.AddPointer(SV);
+  ID.AddInteger(SVOffset);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(isVolatile);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new LoadSDNode(Ops, VTs, ISD::UNINDEXED,
+                             ISD::NON_EXTLOAD, VT, SV, SVOffset, Alignment,
+                             isVolatile);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, MVT::ValueType VT,
+                                   SDOperand Chain, SDOperand Ptr,
+                                   const Value *SV,
+                                   int SVOffset, MVT::ValueType EVT,
+                                   bool isVolatile, unsigned Alignment) {
+  // If they are asking for an extending load from/to the same thing, return a
+  // normal load.
+  if (VT == EVT)
+    ExtType = ISD::NON_EXTLOAD;
+
+  if (MVT::isVector(VT))
+    assert(EVT == MVT::getVectorElementType(VT) && "Invalid vector extload!");
+  else
+    assert(EVT < VT && "Should only be an extending load, not truncating!");
+  assert((ExtType == ISD::EXTLOAD || MVT::isInteger(VT)) &&
+         "Cannot sign/zero extend a FP/Vector load!");
+  assert(MVT::isInteger(VT) == MVT::isInteger(EVT) &&
+         "Cannot convert from FP to Int or Int -> FP!");
+
+  if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+    const Type *Ty = 0;
+    if (VT != MVT::iPTR) {
+      Ty = MVT::getTypeForValueType(VT);
+    } else if (SV) {
+      const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+      assert(PT && "Value for load must be a pointer");
+      Ty = PT->getElementType();
+    }  
+    assert(Ty && "Could not get type information for load");
+    Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+  }
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+  SDOperand Ops[] = { Chain, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(ISD::UNINDEXED);
+  ID.AddInteger(ExtType);
+  ID.AddInteger(EVT);
+  ID.AddPointer(SV);
+  ID.AddInteger(SVOffset);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(isVolatile);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new LoadSDNode(Ops, VTs, ISD::UNINDEXED, ExtType, EVT,
+                             SV, SVOffset, Alignment, isVolatile);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand
+SelectionDAG::getIndexedLoad(SDOperand OrigLoad, SDOperand Base,
+                             SDOperand Offset, ISD::MemIndexedMode AM) {
+  LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+  assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+         "Load is already a indexed load!");
+  MVT::ValueType VT = OrigLoad.getValueType();
+  SDVTList VTs = getVTList(VT, Base.getValueType(), MVT::Other);
+  SDOperand Ops[] = { LD->getChain(), Base, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(AM);
+  ID.AddInteger(LD->getExtensionType());
+  ID.AddInteger(LD->getLoadedVT());
+  ID.AddPointer(LD->getSrcValue());
+  ID.AddInteger(LD->getSrcValueOffset());
+  ID.AddInteger(LD->getAlignment());
+  ID.AddInteger(LD->isVolatile());
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new LoadSDNode(Ops, VTs, AM,
+                             LD->getExtensionType(), LD->getLoadedVT(),
+                             LD->getSrcValue(), LD->getSrcValueOffset(),
+                             LD->getAlignment(), LD->isVolatile());
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getStore(SDOperand Chain, SDOperand Val,
+                                 SDOperand Ptr, const Value *SV, int SVOffset,
+                                 bool isVolatile, unsigned Alignment) {
+  MVT::ValueType VT = Val.getValueType();
+
+  if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+    const Type *Ty = 0;
+    if (VT != MVT::iPTR) {
+      Ty = MVT::getTypeForValueType(VT);
+    } else if (SV) {
+      const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+      assert(PT && "Value for store must be a pointer");
+      Ty = PT->getElementType();
+    }
+    assert(Ty && "Could not get type information for store");
+    Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+  }
+  SDVTList VTs = getVTList(MVT::Other);
+  SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+  SDOperand Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(ISD::UNINDEXED);
+  ID.AddInteger(false);
+  ID.AddInteger(VT);
+  ID.AddPointer(SV);
+  ID.AddInteger(SVOffset);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(isVolatile);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new StoreSDNode(Ops, VTs, ISD::UNINDEXED, false,
+                              VT, SV, SVOffset, Alignment, isVolatile);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getTruncStore(SDOperand Chain, SDOperand Val,
+                                      SDOperand Ptr, const Value *SV,
+                                      int SVOffset, MVT::ValueType SVT,
+                                      bool isVolatile, unsigned Alignment) {
+  MVT::ValueType VT = Val.getValueType();
+  bool isTrunc = VT != SVT;
+
+  assert(VT > SVT && "Not a truncation?");
+  assert(MVT::isInteger(VT) == MVT::isInteger(SVT) &&
+         "Can't do FP-INT conversion!");
+
+  if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+    const Type *Ty = 0;
+    if (VT != MVT::iPTR) {
+      Ty = MVT::getTypeForValueType(VT);
+    } else if (SV) {
+      const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+      assert(PT && "Value for store must be a pointer");
+      Ty = PT->getElementType();
+    }
+    assert(Ty && "Could not get type information for store");
+    Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+  }
+  SDVTList VTs = getVTList(MVT::Other);
+  SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+  SDOperand Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(ISD::UNINDEXED);
+  ID.AddInteger(isTrunc);
+  ID.AddInteger(SVT);
+  ID.AddPointer(SV);
+  ID.AddInteger(SVOffset);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(isVolatile);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new StoreSDNode(Ops, VTs, ISD::UNINDEXED, isTrunc,
+                              SVT, SV, SVOffset, Alignment, isVolatile);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand
+SelectionDAG::getIndexedStore(SDOperand OrigStore, SDOperand Base,
+                              SDOperand Offset, ISD::MemIndexedMode AM) {
+  StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+  assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+         "Store is already a indexed store!");
+  SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+  SDOperand Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(AM);
+  ID.AddInteger(ST->isTruncatingStore());
+  ID.AddInteger(ST->getStoredVT());
+  ID.AddPointer(ST->getSrcValue());
+  ID.AddInteger(ST->getSrcValueOffset());
+  ID.AddInteger(ST->getAlignment());
+  ID.AddInteger(ST->isVolatile());
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode *N = new StoreSDNode(Ops, VTs, AM,
+                              ST->isTruncatingStore(), ST->getStoredVT(),
+                              ST->getSrcValue(), ST->getSrcValueOffset(),
+                              ST->getAlignment(), ST->isVolatile());
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getVAArg(MVT::ValueType VT,
+                                 SDOperand Chain, SDOperand Ptr,
+                                 SDOperand SV) {
+  SDOperand Ops[] = { Chain, Ptr, SV };
+  return getNode(ISD::VAARG, getVTList(VT, MVT::Other), Ops, 3);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+                                const SDOperand *Ops, unsigned NumOps) {
+  switch (NumOps) {
+  case 0: return getNode(Opcode, VT);
+  case 1: return getNode(Opcode, VT, Ops[0]);
+  case 2: return getNode(Opcode, VT, Ops[0], Ops[1]);
+  case 3: return getNode(Opcode, VT, Ops[0], Ops[1], Ops[2]);
+  default: break;
+  }
+  
+  switch (Opcode) {
+  default: break;
+  case ISD::SELECT_CC: {
+    assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+    assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+           "LHS and RHS of condition must have same type!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "True and False arms of SelectCC must have same type!");
+    assert(Ops[2].getValueType() == VT &&
+           "select_cc node must be of same type as true and false value!");
+    break;
+  }
+  case ISD::BR_CC: {
+    assert(NumOps == 5 && "BR_CC takes 5 operands!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "LHS/RHS of comparison should match types!");
+    break;
+  }
+  }
+
+  // Memoize nodes.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    N = new SDNode(Opcode, VTs, Ops, NumOps);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new SDNode(Opcode, VTs, Ops, NumOps);
+  }
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode,
+                                std::vector<MVT::ValueType> &ResultTys,
+                                const SDOperand *Ops, unsigned NumOps) {
+  return getNode(Opcode, getNodeValueTypes(ResultTys), ResultTys.size(),
+                 Ops, NumOps);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode,
+                                const MVT::ValueType *VTs, unsigned NumVTs,
+                                const SDOperand *Ops, unsigned NumOps) {
+  if (NumVTs == 1)
+    return getNode(Opcode, VTs[0], Ops, NumOps);
+  return getNode(Opcode, makeVTList(VTs, NumVTs), Ops, NumOps);
+}  
+  
+SDOperand SelectionDAG::getNode(unsigned Opcode, SDVTList VTList,
+                                const SDOperand *Ops, unsigned NumOps) {
+  if (VTList.NumVTs == 1)
+    return getNode(Opcode, VTList.VTs[0], Ops, NumOps);
+
+  switch (Opcode) {
+  // FIXME: figure out how to safely handle things like
+  // int foo(int x) { return 1 << (x & 255); }
+  // int bar() { return foo(256); }
+#if 0
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS:
+  case ISD::SHL_PARTS:
+    if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+        cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+      return getNode(Opcode, VT, N1, N2, N3.getOperand(0));
+    else if (N3.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+        // If the and is only masking out bits that cannot effect the shift,
+        // eliminate the and.
+        unsigned NumBits = MVT::getSizeInBits(VT)*2;
+        if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+          return getNode(Opcode, VT, N1, N2, N3.getOperand(0));
+      }
+    break;
+#endif
+  }
+
+  // Memoize the node unless it returns a flag.
+  SDNode *N;
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDOperand(E, 0);
+    if (NumOps == 1)
+      N = new UnarySDNode(Opcode, VTList, Ops[0]);
+    else if (NumOps == 2)
+      N = new BinarySDNode(Opcode, VTList, Ops[0], Ops[1]);
+    else if (NumOps == 3)
+      N = new TernarySDNode(Opcode, VTList, Ops[0], Ops[1], Ops[2]);
+    else
+      N = new SDNode(Opcode, VTList, Ops, NumOps);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    if (NumOps == 1)
+      N = new UnarySDNode(Opcode, VTList, Ops[0]);
+    else if (NumOps == 2)
+      N = new BinarySDNode(Opcode, VTList, Ops[0], Ops[1]);
+    else if (NumOps == 3)
+      N = new TernarySDNode(Opcode, VTList, Ops[0], Ops[1], Ops[2]);
+    else
+      N = new SDNode(Opcode, VTList, Ops, NumOps);
+  }
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT) {
+  if (!MVT::isExtendedVT(VT))
+    return makeVTList(SDNode::getValueTypeList(VT), 1);
+
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() == 1 && (*I)[0] == VT)
+      return makeVTList(&(*I)[0], 1);
+  }
+  std::vector<MVT::ValueType> V;
+  V.push_back(VT);
+  VTList.push_front(V);
+  return makeVTList(&(*VTList.begin())[0], 1);
+}
+
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT1, MVT::ValueType VT2) {
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() == 2 && (*I)[0] == VT1 && (*I)[1] == VT2)
+      return makeVTList(&(*I)[0], 2);
+  }
+  std::vector<MVT::ValueType> V;
+  V.push_back(VT1);
+  V.push_back(VT2);
+  VTList.push_front(V);
+  return makeVTList(&(*VTList.begin())[0], 2);
+}
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT1, MVT::ValueType VT2,
+                                 MVT::ValueType VT3) {
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() == 3 && (*I)[0] == VT1 && (*I)[1] == VT2 &&
+        (*I)[2] == VT3)
+      return makeVTList(&(*I)[0], 3);
+  }
+  std::vector<MVT::ValueType> V;
+  V.push_back(VT1);
+  V.push_back(VT2);
+  V.push_back(VT3);
+  VTList.push_front(V);
+  return makeVTList(&(*VTList.begin())[0], 3);
+}
+
+SDVTList SelectionDAG::getVTList(const MVT::ValueType *VTs, unsigned NumVTs) {
+  switch (NumVTs) {
+    case 0: assert(0 && "Cannot have nodes without results!");
+    case 1: return getVTList(VTs[0]);
+    case 2: return getVTList(VTs[0], VTs[1]);
+    case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+    default: break;
+  }
+
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() != NumVTs || VTs[0] != (*I)[0] || VTs[1] != (*I)[1]) continue;
+   
+    bool NoMatch = false;
+    for (unsigned i = 2; i != NumVTs; ++i)
+      if (VTs[i] != (*I)[i]) {
+        NoMatch = true;
+        break;
+      }
+    if (!NoMatch)
+      return makeVTList(&*I->begin(), NumVTs);
+  }
+  
+  VTList.push_front(std::vector<MVT::ValueType>(VTs, VTs+NumVTs));
+  return makeVTList(&*VTList.begin()->begin(), NumVTs);
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands.  If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists.  If the resultant node does not exist in the DAG, the
+/// input node is returned.  As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand Op) {
+  SDNode *N = InN.Val;
+  assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+  
+  // Check to see if there is no change.
+  if (Op == N->getOperand(0)) return InN;
+  
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+    return SDOperand(Existing, InN.ResNo);
+  
+  // Nope it doesn't.  Remove the node from it's current place in the maps.
+  if (InsertPos)
+    RemoveNodeFromCSEMaps(N);
+  
+  // Now we update the operands.
+  N->OperandList[0].Val->removeUser(N);
+  Op.Val->addUser(N);
+  N->OperandList[0] = Op;
+  
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand Op1, SDOperand Op2) {
+  SDNode *N = InN.Val;
+  assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+  
+  // Check to see if there is no change.
+  if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+    return InN;   // No operands changed, just return the input node.
+  
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+    return SDOperand(Existing, InN.ResNo);
+  
+  // Nope it doesn't.  Remove the node from it's current place in the maps.
+  if (InsertPos)
+    RemoveNodeFromCSEMaps(N);
+  
+  // Now we update the operands.
+  if (N->OperandList[0] != Op1) {
+    N->OperandList[0].Val->removeUser(N);
+    Op1.Val->addUser(N);
+    N->OperandList[0] = Op1;
+  }
+  if (N->OperandList[1] != Op2) {
+    N->OperandList[1].Val->removeUser(N);
+    Op2.Val->addUser(N);
+    N->OperandList[1] = Op2;
+  }
+  
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2, SDOperand Op3) {
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2, 
+                   SDOperand Op3, SDOperand Op4) {
+  SDOperand Ops[] = { Op1, Op2, Op3, Op4 };
+  return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2,
+                   SDOperand Op3, SDOperand Op4, SDOperand Op5) {
+  SDOperand Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+  return UpdateNodeOperands(N, Ops, 5);
+}
+
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand *Ops, unsigned NumOps) {
+  SDNode *N = InN.Val;
+  assert(N->getNumOperands() == NumOps &&
+         "Update with wrong number of operands");
+  
+  // Check to see if there is no change.
+  bool AnyChange = false;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    if (Ops[i] != N->getOperand(i)) {
+      AnyChange = true;
+      break;
+    }
+  }
+  
+  // No operands changed, just return the input node.
+  if (!AnyChange) return InN;
+  
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+    return SDOperand(Existing, InN.ResNo);
+  
+  // Nope it doesn't.  Remove the node from it's current place in the maps.
+  if (InsertPos)
+    RemoveNodeFromCSEMaps(N);
+  
+  // Now we update the operands.
+  for (unsigned i = 0; i != NumOps; ++i) {
+    if (N->OperandList[i] != Ops[i]) {
+      N->OperandList[i].Val->removeUser(N);
+      Ops[i].Val->addUser(N);
+      N->OperandList[i] = Ops[i];
+    }
+  }
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return InN;
+}
+
+
+/// MorphNodeTo - This frees the operands of the current node, resets the
+/// opcode, types, and operands to the specified value.  This should only be
+/// used by the SelectionDAG class.
+void SDNode::MorphNodeTo(unsigned Opc, SDVTList L,
+                         const SDOperand *Ops, unsigned NumOps) {
+  NodeType = Opc;
+  ValueList = L.VTs;
+  NumValues = L.NumVTs;
+  
+  // Clear the operands list, updating used nodes to remove this from their
+  // use list.
+  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+    I->Val->removeUser(this);
+  
+  // If NumOps is larger than the # of operands we currently have, reallocate
+  // the operand list.
+  if (NumOps > NumOperands) {
+    if (OperandsNeedDelete)
+      delete [] OperandList;
+    OperandList = new SDOperand[NumOps];
+    OperandsNeedDelete = true;
+  }
+  
+  // Assign the new operands.
+  NumOperands = NumOps;
+  
+  for (unsigned i = 0, e = NumOps; i != e; ++i) {
+    OperandList[i] = Ops[i];
+    SDNode *N = OperandList[i].Val;
+    N->Uses.push_back(this);
+  }
+}
+
+/// SelectNodeTo - These are used for target selectors to *mutate* the
+/// specified node to have the specified return type, Target opcode, and
+/// operands.  Note that target opcodes are stored as
+/// ISD::BUILTIN_OP_END+TargetOpcode in the node opcode field.
+///
+/// Note that SelectNodeTo returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT) {
+  SDVTList VTs = getVTList(VT);
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+   
+  RemoveNodeFromCSEMaps(N);
+  
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0);
+
+  CSEMap.InsertNode(N, IP);
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT, SDOperand Op1) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT);
+  SDOperand Ops[] = { Op1 };
+  
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 1);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+                                       
+  RemoveNodeFromCSEMaps(N);
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 1);
+  CSEMap.InsertNode(N, IP);
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT, SDOperand Op1,
+                                   SDOperand Op2) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT);
+  SDOperand Ops[] = { Op1, Op2 };
+  
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+                                       
+  RemoveNodeFromCSEMaps(N);
+  
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+  
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT, SDOperand Op1,
+                                   SDOperand Op2, SDOperand Op3) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT);
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+                                       
+  RemoveNodeFromCSEMaps(N);
+  
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT, const SDOperand *Ops,
+                                   unsigned NumOps) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT);
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, NumOps);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+                                       
+  RemoveNodeFromCSEMaps(N);
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, NumOps);
+  
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, 
+                                   MVT::ValueType VT1, MVT::ValueType VT2,
+                                   SDOperand Op1, SDOperand Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  FoldingSetNodeID ID;
+  SDOperand Ops[] = { Op1, Op2 };
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+
+  RemoveNodeFromCSEMaps(N);
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+                                   MVT::ValueType VT1, MVT::ValueType VT2,
+                                   SDOperand Op1, SDOperand Op2, 
+                                   SDOperand Op3) {
+  // If an identical node already exists, use it.
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+  void *IP = 0;
+  if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return ON;
+
+  RemoveNodeFromCSEMaps(N);
+
+  N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+  CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+
+/// getTargetNode - These are used for target selectors to create a new node
+/// with specified return type(s), target opcode, and operands.
+///
+/// Note that getTargetNode returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+                                    SDOperand Op1) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+                                    SDOperand Op1, SDOperand Op2) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+                                    SDOperand Op1, SDOperand Op2,
+                                    SDOperand Op3) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+                                    const SDOperand *Ops, unsigned NumOps) {
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops, NumOps).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, SDOperand Op1) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, &Op1, 1).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, SDOperand Op1,
+                                    SDOperand Op2) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  SDOperand Ops[] = { Op1, Op2 };
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, 2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, SDOperand Op1,
+                                    SDOperand Op2, SDOperand Op3) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, 3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, 
+                                    MVT::ValueType VT2,
+                                    const SDOperand *Ops, unsigned NumOps) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, NumOps).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, MVT::ValueType VT3,
+                                    SDOperand Op1, SDOperand Op2) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+  SDOperand Ops[] = { Op1, Op2 };
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, 2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2, MVT::ValueType VT3,
+                                    SDOperand Op1, SDOperand Op2,
+                                    SDOperand Op3) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+  SDOperand Ops[] = { Op1, Op2, Op3 };
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, 3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, 
+                                    MVT::ValueType VT2, MVT::ValueType VT3,
+                                    const SDOperand *Ops, unsigned NumOps) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, NumOps).Val;
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From/To have a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDOperand FromN, SDOperand ToN,
+                                      std::vector<SDNode*> *Deleted) {
+  SDNode *From = FromN.Val, *To = ToN.Val;
+  assert(From->getNumValues() == 1 && To->getNumValues() == 1 &&
+         "Cannot replace with this method!");
+  assert(From != To && "Cannot replace uses of with self");
+  
+  while (!From->use_empty()) {
+    // Process users until they are all gone.
+    SDNode *U = *From->use_begin();
+    
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(U);
+    
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
+        From->removeUser(U);
+        I->Val = To;
+        To->addUser(U);
+      }
+
+    // Now that we have modified U, add it back to the CSE maps.  If it already
+    // exists there, recursively merge the results together.
+    if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+      ReplaceAllUsesWith(U, Existing, Deleted);
+      // U is now dead.
+      if (Deleted) Deleted->push_back(U);
+      DeleteNodeNotInCSEMaps(U);
+    }
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From/To have matching types and numbers of result
+/// values.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+                                      std::vector<SDNode*> *Deleted) {
+  assert(From != To && "Cannot replace uses of with self");
+  assert(From->getNumValues() == To->getNumValues() &&
+         "Cannot use this version of ReplaceAllUsesWith!");
+  if (From->getNumValues() == 1) {  // If possible, use the faster version.
+    ReplaceAllUsesWith(SDOperand(From, 0), SDOperand(To, 0), Deleted);
+    return;
+  }
+  
+  while (!From->use_empty()) {
+    // Process users until they are all gone.
+    SDNode *U = *From->use_begin();
+    
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(U);
+    
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
+        From->removeUser(U);
+        I->Val = To;
+        To->addUser(U);
+      }
+        
+    // Now that we have modified U, add it back to the CSE maps.  If it already
+    // exists there, recursively merge the results together.
+    if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+      ReplaceAllUsesWith(U, Existing, Deleted);
+      // U is now dead.
+      if (Deleted) Deleted->push_back(U);
+      DeleteNodeNotInCSEMaps(U);
+    }
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values.  To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+                                      const SDOperand *To,
+                                      std::vector<SDNode*> *Deleted) {
+  if (From->getNumValues() == 1 && To[0].Val->getNumValues() == 1) {
+    // Degenerate case handled above.
+    ReplaceAllUsesWith(SDOperand(From, 0), To[0], Deleted);
+    return;
+  }
+
+  while (!From->use_empty()) {
+    // Process users until they are all gone.
+    SDNode *U = *From->use_begin();
+    
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(U);
+    
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
+        const SDOperand &ToOp = To[I->ResNo];
+        From->removeUser(U);
+        *I = ToOp;
+        ToOp.Val->addUser(U);
+      }
+        
+    // Now that we have modified U, add it back to the CSE maps.  If it already
+    // exists there, recursively merge the results together.
+    if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+      ReplaceAllUsesWith(U, Existing, Deleted);
+      // U is now dead.
+      if (Deleted) Deleted->push_back(U);
+      DeleteNodeNotInCSEMaps(U);
+    }
+  }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.Val alone.  The Deleted vector is
+/// handled the same was as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDOperand From, SDOperand To,
+                                             std::vector<SDNode*> &Deleted) {
+  assert(From != To && "Cannot replace a value with itself");
+  // Handle the simple, trivial, case efficiently.
+  if (From.Val->getNumValues() == 1 && To.Val->getNumValues() == 1) {
+    ReplaceAllUsesWith(From, To, &Deleted);
+    return;
+  }
+  
+  // Get all of the users of From.Val.  We want these in a nice,
+  // deterministically ordered and uniqued set, so we use a SmallSetVector.
+  SmallSetVector<SDNode*, 16> Users(From.Val->use_begin(), From.Val->use_end());
+
+  while (!Users.empty()) {
+    // We know that this user uses some value of From.  If it is the right
+    // value, update it.
+    SDNode *User = Users.back();
+    Users.pop_back();
+    
+    for (SDOperand *Op = User->OperandList,
+         *E = User->OperandList+User->NumOperands; Op != E; ++Op) {
+      if (*Op == From) {
+        // Okay, we know this user needs to be updated.  Remove its old self
+        // from the CSE maps.
+        RemoveNodeFromCSEMaps(User);
+        
+        // Update all operands that match "From".
+        for (; Op != E; ++Op) {
+          if (*Op == From) {
+            From.Val->removeUser(User);
+            *Op = To;
+            To.Val->addUser(User);
+          }
+        }
+                   
+        // Now that we have modified User, add it back to the CSE maps.  If it
+        // already exists there, recursively merge the results together.
+        if (SDNode *Existing = AddNonLeafNodeToCSEMaps(User)) {
+          unsigned NumDeleted = Deleted.size();
+          ReplaceAllUsesWith(User, Existing, &Deleted);
+          
+          // User is now dead.
+          Deleted.push_back(User);
+          DeleteNodeNotInCSEMaps(User);
+          
+          // We have to be careful here, because ReplaceAllUsesWith could have
+          // deleted a user of From, which means there may be dangling pointers
+          // in the "Users" setvector.  Scan over the deleted node pointers and
+          // remove them from the setvector.
+          for (unsigned i = NumDeleted, e = Deleted.size(); i != e; ++i)
+            Users.remove(Deleted[i]);
+        }
+        break;   // Exit the operand scanning loop.
+      }
+    }
+  }
+}
+
+
+/// AssignNodeIds - Assign a unique node id for each node in the DAG based on
+/// their allnodes order. It returns the maximum id.
+unsigned SelectionDAG::AssignNodeIds() {
+  unsigned Id = 0;
+  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I){
+    SDNode *N = I;
+    N->setNodeId(Id++);
+  }
+  return Id;
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder(std::vector<SDNode*> &TopOrder) {
+  unsigned DAGSize = AllNodes.size();
+  std::vector<unsigned> InDegree(DAGSize);
+  std::vector<SDNode*> Sources;
+
+  // Use a two pass approach to avoid using a std::map which is slow.
+  unsigned Id = 0;
+  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I){
+    SDNode *N = I;
+    N->setNodeId(Id++);
+    unsigned Degree = N->use_size();
+    InDegree[N->getNodeId()] = Degree;
+    if (Degree == 0)
+      Sources.push_back(N);
+  }
+
+  TopOrder.clear();
+  while (!Sources.empty()) {
+    SDNode *N = Sources.back();
+    Sources.pop_back();
+    TopOrder.push_back(N);
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+      SDNode *P = I->Val;
+      unsigned Degree = --InDegree[P->getNodeId()];
+      if (Degree == 0)
+        Sources.push_back(P);
+    }
+  }
+
+  // Second pass, assign the actual topological order as node ids.
+  Id = 0;
+  for (std::vector<SDNode*>::iterator TI = TopOrder.begin(),TE = TopOrder.end();
+       TI != TE; ++TI)
+    (*TI)->setNodeId(Id++);
+
+  return Id;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                              SDNode Class
+//===----------------------------------------------------------------------===//
+
+// Out-of-line virtual method to give class a home.
+void SDNode::ANCHOR() {}
+void UnarySDNode::ANCHOR() {}
+void BinarySDNode::ANCHOR() {}
+void TernarySDNode::ANCHOR() {}
+void HandleSDNode::ANCHOR() {}
+void StringSDNode::ANCHOR() {}
+void ConstantSDNode::ANCHOR() {}
+void ConstantFPSDNode::ANCHOR() {}
+void GlobalAddressSDNode::ANCHOR() {}
+void FrameIndexSDNode::ANCHOR() {}
+void JumpTableSDNode::ANCHOR() {}
+void ConstantPoolSDNode::ANCHOR() {}
+void BasicBlockSDNode::ANCHOR() {}
+void SrcValueSDNode::ANCHOR() {}
+void RegisterSDNode::ANCHOR() {}
+void ExternalSymbolSDNode::ANCHOR() {}
+void CondCodeSDNode::ANCHOR() {}
+void VTSDNode::ANCHOR() {}
+void LoadSDNode::ANCHOR() {}
+void StoreSDNode::ANCHOR() {}
+
+HandleSDNode::~HandleSDNode() {
+  SDVTList VTs = { 0, 0 };
+  MorphNodeTo(ISD::HANDLENODE, VTs, 0, 0);  // Drops operand uses.
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,
+                                         MVT::ValueType VT, int o)
+  : SDNode(isa<GlobalVariable>(GA) &&
+           dyn_cast<GlobalVariable>(GA)->isThreadLocal() ?
+           // Thread Local
+           (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) :
+           // Non Thread Local
+           (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress),
+           getSDVTList(VT)), Offset(o) {
+  TheGlobal = const_cast<GlobalValue*>(GA);
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) {
+  AddNodeIDNode(ID, this);
+}
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+MVT::ValueType *SDNode::getValueTypeList(MVT::ValueType VT) {
+  static MVT::ValueType VTs[MVT::LAST_VALUETYPE];
+  VTs[VT] = VT;
+  return &VTs[VT];
+}
+  
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value.  This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+  assert(Value < getNumValues() && "Bad value!");
+
+  // If there is only one value, this is easy.
+  if (getNumValues() == 1)
+    return use_size() == NUses;
+  if (Uses.size() < NUses) return false;
+
+  SDOperand TheValue(const_cast<SDNode *>(this), Value);
+
+  SmallPtrSet<SDNode*, 32> UsersHandled;
+
+  for (SDNode::use_iterator UI = Uses.begin(), E = Uses.end(); UI != E; ++UI) {
+    SDNode *User = *UI;
+    if (User->getNumOperands() == 1 ||
+        UsersHandled.insert(User))     // First time we've seen this?
+      for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+        if (User->getOperand(i) == TheValue) {
+          if (NUses == 0)
+            return false;   // too many uses
+          --NUses;
+        }
+  }
+
+  // Found exactly the right number of uses?
+  return NUses == 0;
+}
+
+
+/// isOnlyUse - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUse(SDNode *N) const {
+  bool Seen = false;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    SDNode *User = *I;
+    if (User == this)
+      Seen = true;
+    else
+      return false;
+  }
+
+  return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDOperand::isOperand(SDNode *N) const {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (*this == N->getOperand(i))
+      return true;
+  return false;
+}
+
+bool SDNode::isOperand(SDNode *N) const {
+  for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+    if (this == N->OperandList[i].Val)
+      return true;
+  return false;
+}
+
+static void findPredecessor(SDNode *N, const SDNode *P, bool &found,
+                            SmallPtrSet<SDNode *, 32> &Visited) {
+  if (found || !Visited.insert(N))
+    return;
+
+  for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) {
+    SDNode *Op = N->getOperand(i).Val;
+    if (Op == P) {
+      found = true;
+      return;
+    }
+    findPredecessor(Op, P, found, Visited);
+  }
+}
+
+/// isPredecessor - Return true if this node is a predecessor of N. This node
+/// is either an operand of N or it can be reached by recursively traversing
+/// up the operands.
+/// NOTE: this is an expensive method. Use it carefully.
+bool SDNode::isPredecessor(SDNode *N) const {
+  SmallPtrSet<SDNode *, 32> Visited;
+  bool found = false;
+  findPredecessor(N, this, found, Visited);
+  return found;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+  assert(Num < NumOperands && "Invalid child # of SDNode!");
+  return cast<ConstantSDNode>(OperandList[Num])->getValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+  switch (getOpcode()) {
+  default:
+    if (getOpcode() < ISD::BUILTIN_OP_END)
+      return "<<Unknown DAG Node>>";
+    else {
+      if (G) {
+        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+          if (getOpcode()-ISD::BUILTIN_OP_END < TII->getNumOpcodes())
+            return TII->getName(getOpcode()-ISD::BUILTIN_OP_END);
+
+        TargetLowering &TLI = G->getTargetLoweringInfo();
+        const char *Name =
+          TLI.getTargetNodeName(getOpcode());
+        if (Name) return Name;
+      }
+
+      return "<<Unknown Target Node>>";
+    }
+   
+  case ISD::PCMARKER:      return "PCMarker";
+  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+  case ISD::SRCVALUE:      return "SrcValue";
+  case ISD::EntryToken:    return "EntryToken";
+  case ISD::TokenFactor:   return "TokenFactor";
+  case ISD::AssertSext:    return "AssertSext";
+  case ISD::AssertZext:    return "AssertZext";
+
+  case ISD::STRING:        return "String";
+  case ISD::BasicBlock:    return "BasicBlock";
+  case ISD::VALUETYPE:     return "ValueType";
+  case ISD::Register:      return "Register";
+
+  case ISD::Constant:      return "Constant";
+  case ISD::ConstantFP:    return "ConstantFP";
+  case ISD::GlobalAddress: return "GlobalAddress";
+  case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+  case ISD::FrameIndex:    return "FrameIndex";
+  case ISD::JumpTable:     return "JumpTable";
+  case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+  case ISD::RETURNADDR: return "RETURNADDR";
+  case ISD::FRAMEADDR: return "FRAMEADDR";
+  case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+  case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+  case ISD::EHSELECTION: return "EHSELECTION";
+  case ISD::EH_RETURN: return "EH_RETURN";
+  case ISD::ConstantPool:  return "ConstantPool";
+  case ISD::ExternalSymbol: return "ExternalSymbol";
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IID = cast<ConstantSDNode>(getOperand(0))->getValue();
+    return Intrinsic::getName((Intrinsic::ID)IID);
+  }
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned IID = cast<ConstantSDNode>(getOperand(1))->getValue();
+    return Intrinsic::getName((Intrinsic::ID)IID);
+  }
+
+  case ISD::BUILD_VECTOR:   return "BUILD_VECTOR";
+  case ISD::TargetConstant: return "TargetConstant";
+  case ISD::TargetConstantFP:return "TargetConstantFP";
+  case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+  case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+  case ISD::TargetFrameIndex: return "TargetFrameIndex";
+  case ISD::TargetJumpTable:  return "TargetJumpTable";
+  case ISD::TargetConstantPool:  return "TargetConstantPool";
+  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+
+  case ISD::CopyToReg:     return "CopyToReg";
+  case ISD::CopyFromReg:   return "CopyFromReg";
+  case ISD::UNDEF:         return "undef";
+  case ISD::MERGE_VALUES:  return "merge_values";
+  case ISD::INLINEASM:     return "inlineasm";
+  case ISD::LABEL:         return "label";
+  case ISD::HANDLENODE:    return "handlenode";
+  case ISD::FORMAL_ARGUMENTS: return "formal_arguments";
+  case ISD::CALL:          return "call";
+    
+  // Unary operators
+  case ISD::FABS:   return "fabs";
+  case ISD::FNEG:   return "fneg";
+  case ISD::FSQRT:  return "fsqrt";
+  case ISD::FSIN:   return "fsin";
+  case ISD::FCOS:   return "fcos";
+  case ISD::FPOWI:  return "fpowi";
+
+  // Binary operators
+  case ISD::ADD:    return "add";
+  case ISD::SUB:    return "sub";
+  case ISD::MUL:    return "mul";
+  case ISD::MULHU:  return "mulhu";
+  case ISD::MULHS:  return "mulhs";
+  case ISD::SDIV:   return "sdiv";
+  case ISD::UDIV:   return "udiv";
+  case ISD::SREM:   return "srem";
+  case ISD::UREM:   return "urem";
+  case ISD::AND:    return "and";
+  case ISD::OR:     return "or";
+  case ISD::XOR:    return "xor";
+  case ISD::SHL:    return "shl";
+  case ISD::SRA:    return "sra";
+  case ISD::SRL:    return "srl";
+  case ISD::ROTL:   return "rotl";
+  case ISD::ROTR:   return "rotr";
+  case ISD::FADD:   return "fadd";
+  case ISD::FSUB:   return "fsub";
+  case ISD::FMUL:   return "fmul";
+  case ISD::FDIV:   return "fdiv";
+  case ISD::FREM:   return "frem";
+  case ISD::FCOPYSIGN: return "fcopysign";
+
+  case ISD::SETCC:       return "setcc";
+  case ISD::SELECT:      return "select";
+  case ISD::SELECT_CC:   return "select_cc";
+  case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
+  case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
+  case ISD::CONCAT_VECTORS:      return "concat_vectors";
+  case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
+  case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
+  case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
+  case ISD::CARRY_FALSE:         return "carry_false";
+  case ISD::ADDC:        return "addc";
+  case ISD::ADDE:        return "adde";
+  case ISD::SUBC:        return "subc";
+  case ISD::SUBE:        return "sube";
+  case ISD::SHL_PARTS:   return "shl_parts";
+  case ISD::SRA_PARTS:   return "sra_parts";
+  case ISD::SRL_PARTS:   return "srl_parts";
+
+  // Conversion operators.
+  case ISD::SIGN_EXTEND: return "sign_extend";
+  case ISD::ZERO_EXTEND: return "zero_extend";
+  case ISD::ANY_EXTEND:  return "any_extend";
+  case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+  case ISD::TRUNCATE:    return "truncate";
+  case ISD::FP_ROUND:    return "fp_round";
+  case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+  case ISD::FP_EXTEND:   return "fp_extend";
+
+  case ISD::SINT_TO_FP:  return "sint_to_fp";
+  case ISD::UINT_TO_FP:  return "uint_to_fp";
+  case ISD::FP_TO_SINT:  return "fp_to_sint";
+  case ISD::FP_TO_UINT:  return "fp_to_uint";
+  case ISD::BIT_CONVERT: return "bit_convert";
+
+    // Control flow instructions
+  case ISD::BR:      return "br";
+  case ISD::BRIND:   return "brind";
+  case ISD::BR_JT:   return "br_jt";
+  case ISD::BRCOND:  return "brcond";
+  case ISD::BR_CC:   return "br_cc";
+  case ISD::RET:     return "ret";
+  case ISD::CALLSEQ_START:  return "callseq_start";
+  case ISD::CALLSEQ_END:    return "callseq_end";
+
+    // Other operators
+  case ISD::LOAD:               return "load";
+  case ISD::STORE:              return "store";
+  case ISD::VAARG:              return "vaarg";
+  case ISD::VACOPY:             return "vacopy";
+  case ISD::VAEND:              return "vaend";
+  case ISD::VASTART:            return "vastart";
+  case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+  case ISD::EXTRACT_ELEMENT:    return "extract_element";
+  case ISD::BUILD_PAIR:         return "build_pair";
+  case ISD::STACKSAVE:          return "stacksave";
+  case ISD::STACKRESTORE:       return "stackrestore";
+    
+  // Block memory operations.
+  case ISD::MEMSET:  return "memset";
+  case ISD::MEMCPY:  return "memcpy";
+  case ISD::MEMMOVE: return "memmove";
+
+  // Bit manipulation
+  case ISD::BSWAP:   return "bswap";
+  case ISD::CTPOP:   return "ctpop";
+  case ISD::CTTZ:    return "cttz";
+  case ISD::CTLZ:    return "ctlz";
+
+  // Debug info
+  case ISD::LOCATION: return "location";
+  case ISD::DEBUG_LOC: return "debug_loc";
+
+  case ISD::CONDCODE:
+    switch (cast<CondCodeSDNode>(this)->get()) {
+    default: assert(0 && "Unknown setcc condition!");
+    case ISD::SETOEQ:  return "setoeq";
+    case ISD::SETOGT:  return "setogt";
+    case ISD::SETOGE:  return "setoge";
+    case ISD::SETOLT:  return "setolt";
+    case ISD::SETOLE:  return "setole";
+    case ISD::SETONE:  return "setone";
+
+    case ISD::SETO:    return "seto";
+    case ISD::SETUO:   return "setuo";
+    case ISD::SETUEQ:  return "setue";
+    case ISD::SETUGT:  return "setugt";
+    case ISD::SETUGE:  return "setuge";
+    case ISD::SETULT:  return "setult";
+    case ISD::SETULE:  return "setule";
+    case ISD::SETUNE:  return "setune";
+
+    case ISD::SETEQ:   return "seteq";
+    case ISD::SETGT:   return "setgt";
+    case ISD::SETGE:   return "setge";
+    case ISD::SETLT:   return "setlt";
+    case ISD::SETLE:   return "setle";
+    case ISD::SETNE:   return "setne";
+    }
+  }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+  switch (AM) {
+  default:
+    return "";
+  case ISD::PRE_INC:
+    return "<pre-inc>";
+  case ISD::PRE_DEC:
+    return "<pre-dec>";
+  case ISD::POST_INC:
+    return "<post-inc>";
+  case ISD::POST_DEC:
+    return "<post-dec>";
+  }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+  cerr << (void*)this << ": ";
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+    if (i) cerr << ",";
+    if (getValueType(i) == MVT::Other)
+      cerr << "ch";
+    else
+      cerr << MVT::getValueTypeString(getValueType(i));
+  }
+  cerr << " = " << getOperationName(G);
+
+  cerr << " ";
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (i) cerr << ", ";
+    cerr << (void*)getOperand(i).Val;
+    if (unsigned RN = getOperand(i).ResNo)
+      cerr << ":" << RN;
+  }
+
+  if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+    cerr << "<" << CSDN->getValue() << ">";
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+    cerr << "<" << CSDN->getValue() << ">";
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(this)) {
+    int offset = GADN->getOffset();
+    cerr << "<";
+    WriteAsOperand(*cerr.stream(), GADN->getGlobal()) << ">";
+    if (offset > 0)
+      cerr << " + " << offset;
+    else
+      cerr << " " << offset;
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+    cerr << "<" << FIDN->getIndex() << ">";
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+    cerr << "<" << JTDN->getIndex() << ">";
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+    int offset = CP->getOffset();
+    if (CP->isMachineConstantPoolEntry())
+      cerr << "<" << *CP->getMachineCPVal() << ">";
+    else
+      cerr << "<" << *CP->getConstVal() << ">";
+    if (offset > 0)
+      cerr << " + " << offset;
+    else
+      cerr << " " << offset;
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+    cerr << "<";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      cerr << LBB->getName() << " ";
+    cerr << (const void*)BBDN->getBasicBlock() << ">";
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+    if (G && R->getReg() && MRegisterInfo::isPhysicalRegister(R->getReg())) {
+      cerr << " " <<G->getTarget().getRegisterInfo()->getName(R->getReg());
+    } else {
+      cerr << " #" << R->getReg();
+    }
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(this)) {
+    cerr << "'" << ES->getSymbol() << "'";
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+    if (M->getValue())
+      cerr << "<" << M->getValue() << ":" << M->getOffset() << ">";
+    else
+      cerr << "<null:" << M->getOffset() << ">";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+    cerr << ":" << MVT::getValueTypeString(N->getVT());
+  } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD:
+      cerr << " <anyext ";
+      break;
+    case ISD::SEXTLOAD:
+      cerr << " <sext ";
+      break;
+    case ISD::ZEXTLOAD:
+      cerr << " <zext ";
+      break;
+    }
+    if (doExt)
+      cerr << MVT::getValueTypeString(LD->getLoadedVT()) << ">";
+
+    const char *AM = getIndexedModeName(LD->getAddressingMode());
+    if (AM != "")
+      cerr << " " << AM;
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+    if (ST->isTruncatingStore())
+      cerr << " <trunc "
+           << MVT::getValueTypeString(ST->getStoredVT()) << ">";
+
+    const char *AM = getIndexedModeName(ST->getAddressingMode());
+    if (AM != "")
+      cerr << " " << AM;
+  }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i).Val->hasOneUse())
+      DumpNodes(N->getOperand(i).Val, indent+2, G);
+    else
+      cerr << "\n" << std::string(indent+2, ' ')
+           << (void*)N->getOperand(i).Val << ": <multiple use>";
+
+
+  cerr << "\n" << std::string(indent, ' ');
+  N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+  cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
+  std::vector<const SDNode*> Nodes;
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I)
+    Nodes.push_back(I);
+  
+  std::sort(Nodes.begin(), Nodes.end());
+
+  for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+    if (!Nodes[i]->hasOneUse() && Nodes[i] != getRoot().Val)
+      DumpNodes(Nodes[i], 2, this);
+  }
+
+  if (getRoot().Val) DumpNodes(getRoot().Val, 2, this);
+
+  cerr << "\n\n";
+}
+
+const Type *ConstantPoolSDNode::getType() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getType();
+  return Val.ConstVal->getType();
+}

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000..9256faa
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

@@ -0,0 +1,4904 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ParameterAttributes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+          cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+          cl::desc("Pop up a window to show sched dags as they are processed"));
+#else
+static const bool ViewISelDAGs = 0, ViewSchedDAGs = 0;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+namespace {
+  cl::opt<RegisterScheduler::FunctionPassCtor, false,
+          RegisterPassParser<RegisterScheduler> >
+  ISHeuristic("pre-RA-sched",
+              cl::init(&createDefaultScheduler),
+              cl::desc("Instruction schedulers available (before register allocation):"));
+
+  static RegisterScheduler
+  defaultListDAGScheduler("default", "  Best scheduler for the target",
+                          createDefaultScheduler);
+} // namespace
+
+namespace { struct AsmOperandInfo; }
+
+namespace {
+  /// RegsForValue - This struct represents the physical registers that a
+  /// particular value is assigned and the type information about the value.
+  /// This is needed because values can be promoted into larger registers and
+  /// expanded into multiple smaller registers than the value.
+  struct VISIBILITY_HIDDEN RegsForValue {
+    /// Regs - This list holds the register (for legal and promoted values)
+    /// or register set (for expanded values) that the value should be assigned
+    /// to.
+    std::vector<unsigned> Regs;
+    
+    /// RegVT - The value type of each register.
+    ///
+    MVT::ValueType RegVT;
+    
+    /// ValueVT - The value type of the LLVM value, which may be promoted from
+    /// RegVT or made from merging the two expanded parts.
+    MVT::ValueType ValueVT;
+    
+    RegsForValue() : RegVT(MVT::Other), ValueVT(MVT::Other) {}
+    
+    RegsForValue(unsigned Reg, MVT::ValueType regvt, MVT::ValueType valuevt)
+      : RegVT(regvt), ValueVT(valuevt) {
+        Regs.push_back(Reg);
+    }
+    RegsForValue(const std::vector<unsigned> &regs, 
+                 MVT::ValueType regvt, MVT::ValueType valuevt)
+      : Regs(regs), RegVT(regvt), ValueVT(valuevt) {
+    }
+    
+    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+    /// this value and returns the result as a ValueVT value.  This uses 
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    SDOperand getCopyFromRegs(SelectionDAG &DAG,
+                              SDOperand &Chain, SDOperand *Flag) const;
+
+    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+    /// specified value into the registers specified by this object.  This uses 
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    void getCopyToRegs(SDOperand Val, SelectionDAG &DAG,
+                       SDOperand &Chain, SDOperand *Flag) const;
+    
+    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+    /// operand list.  This adds the code marker and includes the number of 
+    /// values added into it.
+    void AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG,
+                              std::vector<SDOperand> &Ops) const;
+  };
+}
+
+namespace llvm {
+  //===--------------------------------------------------------------------===//
+  /// createDefaultScheduler - This creates an instruction scheduler appropriate
+  /// for the target.
+  ScheduleDAG* createDefaultScheduler(SelectionDAGISel *IS,
+                                      SelectionDAG *DAG,
+                                      MachineBasicBlock *BB) {
+    TargetLowering &TLI = IS->getTargetLowering();
+    
+    if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) {
+      return createTDListDAGScheduler(IS, DAG, BB);
+    } else {
+      assert(TLI.getSchedulingPreference() ==
+           TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
+      return createBURRListDAGScheduler(IS, DAG, BB);
+    }
+  }
+
+
+  //===--------------------------------------------------------------------===//
+  /// FunctionLoweringInfo - This contains information that is global to a
+  /// function that is used when lowering a region of the function.
+  class FunctionLoweringInfo {
+  public:
+    TargetLowering &TLI;
+    Function &Fn;
+    MachineFunction &MF;
+    SSARegMap *RegMap;
+
+    FunctionLoweringInfo(TargetLowering &TLI, Function &Fn,MachineFunction &MF);
+
+    /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+    std::map<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+    /// ValueMap - Since we emit code for the function a basic block at a time,
+    /// we must remember which virtual registers hold the values for
+    /// cross-basic-block values.
+    DenseMap<const Value*, unsigned> ValueMap;
+
+    /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+    /// the entry block.  This allows the allocas to be efficiently referenced
+    /// anywhere in the function.
+    std::map<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+    SmallSet<Instruction*, 8> CatchInfoLost;
+    SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+    unsigned MakeReg(MVT::ValueType VT) {
+      return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
+    }
+    
+    /// isExportedInst - Return true if the specified value is an instruction
+    /// exported from its block.
+    bool isExportedInst(const Value *V) {
+      return ValueMap.count(V);
+    }
+
+    unsigned CreateRegForValue(const Value *V);
+    
+    unsigned InitializeRegForValue(const Value *V) {
+      unsigned &R = ValueMap[V];
+      assert(R == 0 && "Already initialized this value register!");
+      return R = CreateRegForValue(V);
+    }
+  };
+}
+
+/// isSelector - Return true if this instruction is a call to the
+/// eh.selector intrinsic.
+static bool isSelector(Instruction *I) {
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    return II->getIntrinsicID() == Intrinsic::eh_selector;
+  return false;
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a 
+/// switch instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+  if (isa<PHINode>(I)) return true;
+  BasicBlock *BB = I->getParent();
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI) ||
+        // FIXME: Remove switchinst special case.
+        isa<SwitchInst>(*UI))
+      return true;
+  return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A) {
+  BasicBlock *Entry = A->getParent()->begin();
+  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+      return false;  // Use not in entry block.
+  return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli,
+                                           Function &fn, MachineFunction &mf)
+    : TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) {
+
+  // Create a vreg for each argument register that is not dead and is used
+  // outside of the entry block for the function.
+  for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end();
+       AI != E; ++AI)
+    if (!isOnlyUsedInEntryBlock(AI))
+      InitializeRegForValue(AI);
+
+  // Initialize the mapping of values to registers.  This is only set up for
+  // instruction values that are used outside of the block that defines
+  // them.
+  Function::iterator BB = Fn.begin(), EB = Fn.end();
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+        const Type *Ty = AI->getAllocatedType();
+        uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+        unsigned Align = 
+          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+                   AI->getAlignment());
+
+        TySize *= CUI->getZExtValue();   // Get total allocated size.
+        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+        StaticAllocaMap[AI] =
+          MF.getFrameInfo()->CreateStackObject(TySize, Align);
+      }
+
+  for (; BB != EB; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+        if (!isa<AllocaInst>(I) ||
+            !StaticAllocaMap.count(cast<AllocaInst>(I)))
+          InitializeRegForValue(I);
+
+  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
+  // also creates the initial PHI MachineInstrs, though none of the input
+  // operands are populated.
+  for (BB = Fn.begin(), EB = Fn.end(); BB != EB; ++BB) {
+    MachineBasicBlock *MBB = new MachineBasicBlock(BB);
+    MBBMap[BB] = MBB;
+    MF.getBasicBlockList().push_back(MBB);
+
+    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+    // appropriate.
+    PHINode *PN;
+    for (BasicBlock::iterator I = BB->begin();(PN = dyn_cast<PHINode>(I)); ++I){
+      if (PN->use_empty()) continue;
+      
+      MVT::ValueType VT = TLI.getValueType(PN->getType());
+      unsigned NumRegisters = TLI.getNumRegisters(VT);
+      unsigned PHIReg = ValueMap[PN];
+      assert(PHIReg && "PHI node does not have an assigned virtual register!");
+      const TargetInstrInfo *TII = TLI.getTargetMachine().getInstrInfo();
+      for (unsigned i = 0; i != NumRegisters; ++i)
+        BuildMI(MBB, TII->get(TargetInstrInfo::PHI), PHIReg+i);
+    }
+  }
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types.  Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+  MVT::ValueType VT = TLI.getValueType(V->getType());
+  
+  unsigned NumRegisters = TLI.getNumRegisters(VT);
+  MVT::ValueType RegisterVT = TLI.getRegisterType(VT);
+
+  unsigned R = MakeReg(RegisterVT);
+  for (unsigned i = 1; i != NumRegisters; ++i)
+    MakeReg(RegisterVT);
+
+  return R;
+}
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLowering - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+/// Also, targets can overload any lowering method.
+///
+namespace llvm {
+class SelectionDAGLowering {
+  MachineBasicBlock *CurMBB;
+
+  DenseMap<const Value*, SDOperand> NodeMap;
+
+  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
+  /// them up and then emit token factor nodes when possible.  This allows us to
+  /// get simple disambiguation between loads without worrying about alias
+  /// analysis.
+  std::vector<SDOperand> PendingLoads;
+
+  /// Case - A struct to record the Value for a switch case, and the
+  /// case's target basic block.
+  struct Case {
+    Constant* Low;
+    Constant* High;
+    MachineBasicBlock* BB;
+
+    Case() : Low(0), High(0), BB(0) { }
+    Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+      Low(low), High(high), BB(bb) { }
+    uint64_t size() const {
+      uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();
+      uint64_t rLow  = cast<ConstantInt>(Low)->getSExtValue();
+      return (rHigh - rLow + 1ULL);
+    }
+  };
+
+  struct CaseBits {
+    uint64_t Mask;
+    MachineBasicBlock* BB;
+    unsigned Bits;
+
+    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+      Mask(mask), BB(bb), Bits(bits) { }
+  };
+
+  typedef std::vector<Case>           CaseVector;
+  typedef std::vector<CaseBits>       CaseBitsVector;
+  typedef CaseVector::iterator        CaseItr;
+  typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+  /// of conditional branches.
+  struct CaseRec {
+    CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+    /// CaseBB - The MBB in which to emit the compare and branch
+    MachineBasicBlock *CaseBB;
+    /// LT, GE - If nonzero, we know the current case value must be less-than or
+    /// greater-than-or-equal-to these Constants.
+    Constant *LT;
+    Constant *GE;
+    /// Range - A pair of iterators representing the range of case values to be
+    /// processed at this point in the binary search tree.
+    CaseRange Range;
+  };
+
+  typedef std::vector<CaseRec> CaseRecVector;
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator () (const Case& C1, const Case& C2) {
+      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+
+  struct CaseBitsCmp {
+    bool operator () (const CaseBits& C1, const CaseBits& C2) {
+      return C1.Bits > C2.Bits;
+    }
+  };
+
+  unsigned Clusterify(CaseVector& Cases, const SwitchInst &SI);
+  
+public:
+  // TLI - This is information that describes the available target features we
+  // need for lowering.  This indicates when operations are unavailable,
+  // implemented with a libcall, etc.
+  TargetLowering &TLI;
+  SelectionDAG &DAG;
+  const TargetData *TD;
+
+  /// SwitchCases - Vector of CaseBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<SelectionDAGISel::CaseBlock> SwitchCases;
+  /// JTCases - Vector of JumpTable structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<SelectionDAGISel::JumpTableBlock> JTCases;
+  std::vector<SelectionDAGISel::BitTestBlock> BitTestCases;
+  
+  /// FuncInfo - Information about the function as a whole.
+  ///
+  FunctionLoweringInfo &FuncInfo;
+
+  SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
+                       FunctionLoweringInfo &funcinfo)
+    : TLI(tli), DAG(dag), TD(DAG.getTarget().getTargetData()),
+      FuncInfo(funcinfo) {
+  }
+
+  /// getRoot - Return the current virtual root of the Selection DAG.
+  ///
+  SDOperand getRoot() {
+    if (PendingLoads.empty())
+      return DAG.getRoot();
+
+    if (PendingLoads.size() == 1) {
+      SDOperand Root = PendingLoads[0];
+      DAG.setRoot(Root);
+      PendingLoads.clear();
+      return Root;
+    }
+
+    // Otherwise, we have to make a token factor node.
+    SDOperand Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                                 &PendingLoads[0], PendingLoads.size());
+    PendingLoads.clear();
+    DAG.setRoot(Root);
+    return Root;
+  }
+
+  SDOperand CopyValueToVirtualRegister(Value *V, unsigned Reg);
+
+  void visit(Instruction &I) { visit(I.getOpcode(), I); }
+
+  void visit(unsigned Opcode, User &I) {
+    // Note: this doesn't use InstVisitor, because it has to work with
+    // ConstantExpr's in addition to instructions.
+    switch (Opcode) {
+    default: assert(0 && "Unknown instruction type encountered!");
+             abort();
+      // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
+#include "llvm/Instruction.def"
+    }
+  }
+
+  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+
+  SDOperand getLoadFrom(const Type *Ty, SDOperand Ptr,
+                        const Value *SV, SDOperand Root,
+                        bool isVolatile, unsigned Alignment);
+
+  SDOperand getIntPtrConstant(uint64_t Val) {
+    return DAG.getConstant(Val, TLI.getPointerTy());
+  }
+
+  SDOperand getValue(const Value *V);
+
+  void setValue(const Value *V, SDOperand NewN) {
+    SDOperand &N = NodeMap[V];
+    assert(N.Val == 0 && "Already set a value for this node!");
+    N = NewN;
+  }
+  
+  void GetRegistersForValue(AsmOperandInfo &OpInfo, bool HasEarlyClobber,
+                            std::set<unsigned> &OutputRegs, 
+                            std::set<unsigned> &InputRegs);
+
+  void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+                            unsigned Opc);
+  bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
+  void ExportFromCurrentBlock(Value *V);
+  void LowerCallTo(Instruction &I,
+                   const Type *CalledValueTy, unsigned CallingConv,
+                   bool IsTailCall, SDOperand Callee, unsigned OpIdx,
+                   MachineBasicBlock *LandingPad = NULL);
+  
+  // Terminator instructions.
+  void visitRet(ReturnInst &I);
+  void visitBr(BranchInst &I);
+  void visitSwitch(SwitchInst &I);
+  void visitUnreachable(UnreachableInst &I) { /* noop */ }
+
+  // Helpers for visitSwitch
+  bool handleSmallSwitchRange(CaseRec& CR,
+                              CaseRecVector& WorkList,
+                              Value* SV,
+                              MachineBasicBlock* Default);
+  bool handleJTSwitchCase(CaseRec& CR,
+                          CaseRecVector& WorkList,
+                          Value* SV,
+                          MachineBasicBlock* Default);
+  bool handleBTSplitSwitchCase(CaseRec& CR,
+                               CaseRecVector& WorkList,
+                               Value* SV,
+                               MachineBasicBlock* Default);
+  bool handleBitTestsSwitchCase(CaseRec& CR,
+                                CaseRecVector& WorkList,
+                                Value* SV,
+                                MachineBasicBlock* Default);  
+  void visitSwitchCase(SelectionDAGISel::CaseBlock &CB);
+  void visitBitTestHeader(SelectionDAGISel::BitTestBlock &B);
+  void visitBitTestCase(MachineBasicBlock* NextMBB,
+                        unsigned Reg,
+                        SelectionDAGISel::BitTestCase &B);
+  void visitJumpTable(SelectionDAGISel::JumpTable &JT);
+  void visitJumpTableHeader(SelectionDAGISel::JumpTable &JT,
+                            SelectionDAGISel::JumpTableHeader &JTH);
+  
+  // These all get lowered before this pass.
+  void visitInvoke(InvokeInst &I);
+  void visitUnwind(UnwindInst &I);
+
+  void visitBinary(User &I, unsigned OpCode);
+  void visitShift(User &I, unsigned Opcode);
+  void visitAdd(User &I) { 
+    if (I.getType()->isFPOrFPVector())
+      visitBinary(I, ISD::FADD);
+    else
+      visitBinary(I, ISD::ADD);
+  }
+  void visitSub(User &I);
+  void visitMul(User &I) {
+    if (I.getType()->isFPOrFPVector())
+      visitBinary(I, ISD::FMUL);
+    else
+      visitBinary(I, ISD::MUL);
+  }
+  void visitURem(User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
+  void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(User &I);
+  void visitFCmp(User &I);
+  // Visit the conversion instructions
+  void visitTrunc(User &I);
+  void visitZExt(User &I);
+  void visitSExt(User &I);
+  void visitFPTrunc(User &I);
+  void visitFPExt(User &I);
+  void visitFPToUI(User &I);
+  void visitFPToSI(User &I);
+  void visitUIToFP(User &I);
+  void visitSIToFP(User &I);
+  void visitPtrToInt(User &I);
+  void visitIntToPtr(User &I);
+  void visitBitCast(User &I);
+
+  void visitExtractElement(User &I);
+  void visitInsertElement(User &I);
+  void visitShuffleVector(User &I);
+
+  void visitGetElementPtr(User &I);
+  void visitSelect(User &I);
+
+  void visitMalloc(MallocInst &I);
+  void visitFree(FreeInst &I);
+  void visitAlloca(AllocaInst &I);
+  void visitLoad(LoadInst &I);
+  void visitStore(StoreInst &I);
+  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
+  void visitCall(CallInst &I);
+  void visitInlineAsm(CallInst &I);
+  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+
+  void visitVAStart(CallInst &I);
+  void visitVAArg(VAArgInst &I);
+  void visitVAEnd(CallInst &I);
+  void visitVACopy(CallInst &I);
+
+  void visitMemIntrinsic(CallInst &I, unsigned Op);
+
+  void visitUserOp1(Instruction &I) {
+    assert(0 && "UserOp1 should not exist at instruction selection time!");
+    abort();
+  }
+  void visitUserOp2(Instruction &I) {
+    assert(0 && "UserOp2 should not exist at instruction selection time!");
+    abort();
+  }
+};
+} // end namespace llvm
+
+
+/// getCopyFromParts - Create a value that contains the
+/// specified legal parts combined into the value they represent.
+static SDOperand getCopyFromParts(SelectionDAG &DAG,
+                                  const SDOperand *Parts,
+                                  unsigned NumParts,
+                                  MVT::ValueType PartVT,
+                                  MVT::ValueType ValueVT,
+                                  ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+  if (!MVT::isVector(ValueVT) || NumParts == 1) {
+    SDOperand Val = Parts[0];
+
+    // If the value was expanded, copy from the top part.
+    if (NumParts > 1) {
+      assert(NumParts == 2 &&
+             "Cannot expand to more than 2 elts yet!");
+      SDOperand Hi = Parts[1];
+      if (!DAG.getTargetLoweringInfo().isLittleEndian())
+        std::swap(Val, Hi);
+      return DAG.getNode(ISD::BUILD_PAIR, ValueVT, Val, Hi);
+    }
+
+    // Otherwise, if the value was promoted or extended, truncate it to the
+    // appropriate type.
+    if (PartVT == ValueVT)
+      return Val;
+  
+    if (MVT::isVector(PartVT)) {
+      assert(MVT::isVector(ValueVT) && "Unknown vector conversion!");
+      return DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+    }
+  
+    if (MVT::isInteger(PartVT) &&
+        MVT::isInteger(ValueVT)) {
+      if (ValueVT < PartVT) {
+        // For a truncate, see if we have any information to
+        // indicate whether the truncated bits will always be
+        // zero or sign-extension.
+        if (AssertOp != ISD::DELETED_NODE)
+          Val = DAG.getNode(AssertOp, PartVT, Val,
+                            DAG.getValueType(ValueVT));
+        return DAG.getNode(ISD::TRUNCATE, ValueVT, Val);
+      } else {
+        return DAG.getNode(ISD::ANY_EXTEND, ValueVT, Val);
+      }
+    }
+  
+    if (MVT::isFloatingPoint(PartVT) &&
+        MVT::isFloatingPoint(ValueVT))
+      return DAG.getNode(ISD::FP_ROUND, ValueVT, Val);
+
+    if (MVT::getSizeInBits(PartVT) == 
+        MVT::getSizeInBits(ValueVT))
+      return DAG.getNode(ISD::BIT_CONVERT, ValueVT, Val);
+
+    assert(0 && "Unknown mismatch!");
+  }
+
+  // Handle a multi-element vector.
+  MVT::ValueType IntermediateVT, RegisterVT;
+  unsigned NumIntermediates;
+  unsigned NumRegs =
+    DAG.getTargetLoweringInfo()
+      .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+                              RegisterVT);
+
+  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+  assert(RegisterVT == Parts[0].getValueType() &&
+         "Part type doesn't match part!");
+
+  // Assemble the parts into intermediate operands.
+  SmallVector<SDOperand, 8> Ops(NumIntermediates);
+  if (NumIntermediates == NumParts) {
+    // If the register was not expanded, truncate or copy the value,
+    // as appropriate.
+    for (unsigned i = 0; i != NumParts; ++i)
+      Ops[i] = getCopyFromParts(DAG, &Parts[i], 1,
+                                PartVT, IntermediateVT);
+  } else if (NumParts > 0) {
+    // If the intermediate type was expanded, build the intermediate operands
+    // from the parts.
+    assert(NumIntermediates % NumParts == 0 &&
+           "Must expand into a divisible number of parts!");
+    unsigned Factor = NumIntermediates / NumParts;
+    for (unsigned i = 0; i != NumIntermediates; ++i)
+      Ops[i] = getCopyFromParts(DAG, &Parts[i * Factor], Factor,
+                                PartVT, IntermediateVT);
+  }
+  
+  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
+  // operands.
+  return DAG.getNode(MVT::isVector(IntermediateVT) ?
+                       ISD::CONCAT_VECTORS :
+                       ISD::BUILD_VECTOR,
+                     ValueVT, &Ops[0], NumParts);
+}
+
+/// getCopyToParts - Create a series of nodes that contain the
+/// specified value split into legal parts.
+static void getCopyToParts(SelectionDAG &DAG,
+                           SDOperand Val,
+                           SDOperand *Parts,
+                           unsigned NumParts,
+                           MVT::ValueType PartVT) {
+  MVT::ValueType ValueVT = Val.getValueType();
+
+  if (!MVT::isVector(ValueVT) || NumParts == 1) {
+    // If the value was expanded, copy from the parts.
+    if (NumParts > 1) {
+      for (unsigned i = 0; i != NumParts; ++i)
+        Parts[i] = DAG.getNode(ISD::EXTRACT_ELEMENT, PartVT, Val,
+                               DAG.getConstant(i, MVT::i32));
+      if (!DAG.getTargetLoweringInfo().isLittleEndian())
+        std::reverse(Parts, Parts + NumParts);
+      return;
+    }
+
+    // If there is a single part and the types differ, this must be
+    // a promotion.
+    if (PartVT != ValueVT) {
+      if (MVT::isVector(PartVT)) {
+        assert(MVT::isVector(ValueVT) &&
+               "Not a vector-vector cast?");
+        Val = DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+      } else if (MVT::isInteger(PartVT) && MVT::isInteger(ValueVT)) {
+        if (PartVT < ValueVT)
+          Val = DAG.getNode(ISD::TRUNCATE, PartVT, Val);
+        else
+          Val = DAG.getNode(ISD::ANY_EXTEND, PartVT, Val);
+      } else if (MVT::isFloatingPoint(PartVT) &&
+                 MVT::isFloatingPoint(ValueVT)) {
+        Val = DAG.getNode(ISD::FP_EXTEND, PartVT, Val);
+      } else if (MVT::getSizeInBits(PartVT) == 
+                 MVT::getSizeInBits(ValueVT)) {
+        Val = DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+      } else {
+        assert(0 && "Unknown mismatch!");
+      }
+    }
+    Parts[0] = Val;
+    return;
+  }
+
+  // Handle a multi-element vector.
+  MVT::ValueType IntermediateVT, RegisterVT;
+  unsigned NumIntermediates;
+  unsigned NumRegs =
+    DAG.getTargetLoweringInfo()
+      .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+                              RegisterVT);
+  unsigned NumElements = MVT::getVectorNumElements(ValueVT);
+
+  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+  // Split the vector into intermediate operands.
+  SmallVector<SDOperand, 8> Ops(NumIntermediates);
+  for (unsigned i = 0; i != NumIntermediates; ++i)
+    if (MVT::isVector(IntermediateVT))
+      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR,
+                           IntermediateVT, Val,
+                           DAG.getConstant(i * (NumElements / NumIntermediates),
+                                           MVT::i32));
+    else
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                           IntermediateVT, Val, 
+                           DAG.getConstant(i, MVT::i32));
+
+  // Split the intermediate operands into legal parts.
+  if (NumParts == NumIntermediates) {
+    // If the register was not expanded, promote or copy the value,
+    // as appropriate.
+    for (unsigned i = 0; i != NumParts; ++i)
+      getCopyToParts(DAG, Ops[i], &Parts[i], 1, PartVT);
+  } else if (NumParts > 0) {
+    // If the intermediate type was expanded, split each the value into
+    // legal parts.
+    assert(NumParts % NumIntermediates == 0 &&
+           "Must expand into a divisible number of parts!");
+    unsigned Factor = NumParts / NumIntermediates;
+    for (unsigned i = 0; i != NumIntermediates; ++i)
+      getCopyToParts(DAG, Ops[i], &Parts[i * Factor], Factor, PartVT);
+  }
+}
+
+
+SDOperand SelectionDAGLowering::getValue(const Value *V) {
+  SDOperand &N = NodeMap[V];
+  if (N.Val) return N;
+  
+  const Type *VTy = V->getType();
+  MVT::ValueType VT = TLI.getValueType(VTy);
+  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      visit(CE->getOpcode(), *CE);
+      SDOperand N1 = NodeMap[V];
+      assert(N1.Val && "visit didn't populate the ValueMap!");
+      return N1;
+    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
+      return N = DAG.getGlobalAddress(GV, VT);
+    } else if (isa<ConstantPointerNull>(C)) {
+      return N = DAG.getConstant(0, TLI.getPointerTy());
+    } else if (isa<UndefValue>(C)) {
+      if (!isa<VectorType>(VTy))
+        return N = DAG.getNode(ISD::UNDEF, VT);
+
+      // Create a BUILD_VECTOR of undef nodes.
+      const VectorType *PTy = cast<VectorType>(VTy);
+      unsigned NumElements = PTy->getNumElements();
+      MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
+
+      SmallVector<SDOperand, 8> Ops;
+      Ops.assign(NumElements, DAG.getNode(ISD::UNDEF, PVT));
+      
+      // Create a VConstant node with generic Vector type.
+      MVT::ValueType VT = MVT::getVectorType(PVT, NumElements);
+      return N = DAG.getNode(ISD::BUILD_VECTOR, VT,
+                             &Ops[0], Ops.size());
+    } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      return N = DAG.getConstantFP(CFP->getValue(), VT);
+    } else if (const VectorType *PTy = dyn_cast<VectorType>(VTy)) {
+      unsigned NumElements = PTy->getNumElements();
+      MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
+      
+      // Now that we know the number and type of the elements, push a
+      // Constant or ConstantFP node onto the ops list for each element of
+      // the vector constant.
+      SmallVector<SDOperand, 8> Ops;
+      if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+        for (unsigned i = 0; i != NumElements; ++i)
+          Ops.push_back(getValue(CP->getOperand(i)));
+      } else {
+        assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+        SDOperand Op;
+        if (MVT::isFloatingPoint(PVT))
+          Op = DAG.getConstantFP(0, PVT);
+        else
+          Op = DAG.getConstant(0, PVT);
+        Ops.assign(NumElements, Op);
+      }
+      
+      // Create a BUILD_VECTOR node.
+      MVT::ValueType VT = MVT::getVectorType(PVT, NumElements);
+      return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0],
+                                      Ops.size());
+    } else {
+      // Canonicalize all constant ints to be unsigned.
+      return N = DAG.getConstant(cast<ConstantInt>(C)->getZExtValue(),VT);
+    }
+  }
+      
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    std::map<const AllocaInst*, int>::iterator SI =
+    FuncInfo.StaticAllocaMap.find(AI);
+    if (SI != FuncInfo.StaticAllocaMap.end())
+      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+  }
+      
+  unsigned InReg = FuncInfo.ValueMap[V];
+  assert(InReg && "Value not in map!");
+  
+  MVT::ValueType RegisterVT = TLI.getRegisterType(VT);
+  unsigned NumRegs = TLI.getNumRegisters(VT);
+
+  std::vector<unsigned> Regs(NumRegs);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    Regs[i] = InReg + i;
+
+  RegsForValue RFV(Regs, RegisterVT, VT);
+  SDOperand Chain = DAG.getEntryNode();
+
+  return RFV.getCopyFromRegs(DAG, Chain, NULL);
+}
+
+
+void SelectionDAGLowering::visitRet(ReturnInst &I) {
+  if (I.getNumOperands() == 0) {
+    DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot()));
+    return;
+  }
+  SmallVector<SDOperand, 8> NewValues;
+  NewValues.push_back(getRoot());
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    SDOperand RetOp = getValue(I.getOperand(i));
+    
+    // If this is an integer return value, we need to promote it ourselves to
+    // the full width of a register, since getCopyToParts and Legalize will use
+    // ANY_EXTEND rather than sign/zero.
+    // FIXME: C calling convention requires the return type to be promoted to
+    // at least 32-bit. But this is not necessary for non-C calling conventions.
+    if (MVT::isInteger(RetOp.getValueType()) && 
+        RetOp.getValueType() < MVT::i64) {
+      MVT::ValueType TmpVT;
+      if (TLI.getTypeAction(MVT::i32) == TargetLowering::Promote)
+        TmpVT = TLI.getTypeToTransformTo(MVT::i32);
+      else
+        TmpVT = MVT::i32;
+      const FunctionType *FTy = I.getParent()->getParent()->getFunctionType();
+      const ParamAttrsList *Attrs = FTy->getParamAttrs();
+      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+      if (Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt))
+        ExtendKind = ISD::SIGN_EXTEND;
+      if (Attrs && Attrs->paramHasAttr(0, ParamAttr::ZExt))
+        ExtendKind = ISD::ZERO_EXTEND;
+      RetOp = DAG.getNode(ExtendKind, TmpVT, RetOp);
+      NewValues.push_back(RetOp);
+      NewValues.push_back(DAG.getConstant(false, MVT::i32));
+    } else {
+      MVT::ValueType VT = RetOp.getValueType();
+      unsigned NumParts = TLI.getNumRegisters(VT);
+      MVT::ValueType PartVT = TLI.getRegisterType(VT);
+      SmallVector<SDOperand, 4> Parts(NumParts);
+      getCopyToParts(DAG, RetOp, &Parts[0], NumParts, PartVT);
+      for (unsigned i = 0; i < NumParts; ++i) {
+        NewValues.push_back(Parts[i]);
+        NewValues.push_back(DAG.getConstant(false, MVT::i32));
+      }
+    }
+  }
+  DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other,
+                          &NewValues[0], NewValues.size()));
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {
+  // No need to export constants.
+  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+  
+  // Already exported?
+  if (FuncInfo.isExportedInst(V)) return;
+
+  unsigned Reg = FuncInfo.InitializeRegForValue(V);
+  PendingLoads.push_back(CopyValueToVirtualRegister(V, Reg));
+}
+
+bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V,
+                                                    const BasicBlock *FromBB) {
+  // The operands of the setcc have to be in this block.  We don't know
+  // how to export them from some other block.
+  if (Instruction *VI = dyn_cast<Instruction>(V)) {
+    // Can export from current BB.
+    if (VI->getParent() == FromBB)
+      return true;
+    
+    // Is already exported, noop.
+    return FuncInfo.isExportedInst(V);
+  }
+  
+  // If this is an argument, we can export it if the BB is the entry block or
+  // if it is already exported.
+  if (isa<Argument>(V)) {
+    if (FromBB == &FromBB->getParent()->getEntryBlock())
+      return true;
+
+    // Otherwise, can only export this if it is already exported.
+    return FuncInfo.isExportedInst(V);
+  }
+  
+  // Otherwise, constants can always be exported.
+  return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == BB;
+  return true;
+}
+
+/// FindMergedConditions - If Cond is an expression like 
+void SelectionDAGLowering::FindMergedConditions(Value *Cond,
+                                                MachineBasicBlock *TBB,
+                                                MachineBasicBlock *FBB,
+                                                MachineBasicBlock *CurBB,
+                                                unsigned Opc) {
+  // If this node is not part of the or/and tree, emit it as a branch.
+  Instruction *BOp = dyn_cast<Instruction>(Cond);
+
+  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || 
+      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+      BOp->getParent() != CurBB->getBasicBlock() ||
+      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+    const BasicBlock *BB = CurBB->getBasicBlock();
+    
+    // If the leaf of the tree is a comparison, merge the condition into 
+    // the caseblock.
+    if ((isa<ICmpInst>(Cond) || isa<FCmpInst>(Cond)) &&
+        // The operands of the cmp have to be in this block.  We don't know
+        // how to export them from some other block.  If this is the first block
+        // of the sequence, no exporting is needed.
+        (CurBB == CurMBB ||
+         (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+          isExportableFromCurrentBlock(BOp->getOperand(1), BB)))) {
+      BOp = cast<Instruction>(Cond);
+      ISD::CondCode Condition;
+      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+        switch (IC->getPredicate()) {
+        default: assert(0 && "Unknown icmp predicate opcode!");
+        case ICmpInst::ICMP_EQ:  Condition = ISD::SETEQ;  break;
+        case ICmpInst::ICMP_NE:  Condition = ISD::SETNE;  break;
+        case ICmpInst::ICMP_SLE: Condition = ISD::SETLE;  break;
+        case ICmpInst::ICMP_ULE: Condition = ISD::SETULE; break;
+        case ICmpInst::ICMP_SGE: Condition = ISD::SETGE;  break;
+        case ICmpInst::ICMP_UGE: Condition = ISD::SETUGE; break;
+        case ICmpInst::ICMP_SLT: Condition = ISD::SETLT;  break;
+        case ICmpInst::ICMP_ULT: Condition = ISD::SETULT; break;
+        case ICmpInst::ICMP_SGT: Condition = ISD::SETGT;  break;
+        case ICmpInst::ICMP_UGT: Condition = ISD::SETUGT; break;
+        }
+      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+        ISD::CondCode FPC, FOC;
+        switch (FC->getPredicate()) {
+        default: assert(0 && "Unknown fcmp predicate opcode!");
+        case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+        case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+        case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+        case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+        case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+        case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+        case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+        case FCmpInst::FCMP_ORD:   FOC = ISD::SETEQ; FPC = ISD::SETO;   break;
+        case FCmpInst::FCMP_UNO:   FOC = ISD::SETNE; FPC = ISD::SETUO;  break;
+        case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+        case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+        case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+        case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+        case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+        case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+        case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+        }
+        if (FiniteOnlyFPMath())
+          Condition = FOC;
+        else 
+          Condition = FPC;
+      } else {
+        Condition = ISD::SETEQ; // silence warning.
+        assert(0 && "Unknown compare instruction");
+      }
+      
+      SelectionDAGISel::CaseBlock CB(Condition, BOp->getOperand(0), 
+                                     BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+      SwitchCases.push_back(CB);
+      return;
+    }
+    
+    // Create a CaseBlock record representing this branch.
+    SelectionDAGISel::CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(),
+                                   NULL, TBB, FBB, CurBB);
+    SwitchCases.push_back(CB);
+    return;
+  }
+  
+  
+  //  Create TmpBB after CurBB.
+  MachineFunction::iterator BBI = CurBB;
+  MachineBasicBlock *TmpBB = new MachineBasicBlock(CurBB->getBasicBlock());
+  CurBB->getParent()->getBasicBlockList().insert(++BBI, TmpBB);
+  
+  if (Opc == Instruction::Or) {
+    // Codegen X | Y as:
+    //   jmp_if_X TBB
+    //   jmp TmpBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+  
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+  
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+  } else {
+    assert(Opc == Instruction::And && "Unknown merge op!");
+    // Codegen X & Y as:
+    //   jmp_if_X TmpBB
+    //   jmp FBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+    //  This requires creation of TmpBB after CurBB.
+    
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+    
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+  }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+static bool 
+ShouldEmitAsBranches(const std::vector<SelectionDAGISel::CaseBlock> &Cases) {
+  if (Cases.size() != 2) return true;
+  
+  // If this is two comparisons of the same values or'd or and'd together, they
+  // will get folded into a single comparison, so don't emit two blocks.
+  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+    return false;
+  }
+  
+  return true;
+}
+
+void SelectionDAGLowering::visitBr(BranchInst &I) {
+  // Update machine-CFG edges.
+  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  if (I.isUnconditional()) {
+    // If this is not a fall-through branch, emit the branch.
+    if (Succ0MBB != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+                              DAG.getBasicBlock(Succ0MBB)));
+
+    // Update machine-CFG edges.
+    CurMBB->addSuccessor(Succ0MBB);
+
+    return;
+  }
+
+  // If this condition is one of the special cases we handle, do special stuff
+  // now.
+  Value *CondVal = I.getCondition();
+  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  // If this is a series of conditions that are or'd or and'd together, emit
+  // this as a sequence of branches instead of setcc's with and/or operations.
+  // For example, instead of something like:
+  //     cmp A, B
+  //     C = seteq 
+  //     cmp D, E
+  //     F = setle 
+  //     or C, F
+  //     jnz foo
+  // Emit:
+  //     cmp A, B
+  //     je foo
+  //     cmp D, E
+  //     jle foo
+  //
+  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+    if (BOp->hasOneUse() && 
+        (BOp->getOpcode() == Instruction::And ||
+         BOp->getOpcode() == Instruction::Or)) {
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+      // If the compares in later blocks need to use values not currently
+      // exported from this block, export them now.  This block should always
+      // be the first entry.
+      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+      
+      // Allow some cases to be rejected.
+      if (ShouldEmitAsBranches(SwitchCases)) {
+        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+        }
+        
+        // Emit the branch for this block.
+        visitSwitchCase(SwitchCases[0]);
+        SwitchCases.erase(SwitchCases.begin());
+        return;
+      }
+      
+      // Okay, we decided not to do this, remove any inserted MBB's and clear
+      // SwitchCases.
+      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+        CurMBB->getParent()->getBasicBlockList().erase(SwitchCases[i].ThisBB);
+      
+      SwitchCases.clear();
+    }
+  }
+  
+  // Create a CaseBlock record representing this branch.
+  SelectionDAGISel::CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(),
+                                 NULL, Succ0MBB, Succ1MBB, CurMBB);
+  // Use visitSwitchCase to actually insert the fast branch sequence for this
+  // cond branch.
+  visitSwitchCase(CB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGLowering::visitSwitchCase(SelectionDAGISel::CaseBlock &CB) {
+  SDOperand Cond;
+  SDOperand CondLHS = getValue(CB.CmpLHS);
+  
+  // Build the setcc now. 
+  if (CB.CmpMHS == NULL) {
+    // Fold "(X == true)" to X and "(X == false)" to !X to
+    // handle common cases produced by branch lowering.
+    if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ)
+      Cond = CondLHS;
+    else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) {
+      SDOperand True = DAG.getConstant(1, CondLHS.getValueType());
+      Cond = DAG.getNode(ISD::XOR, CondLHS.getValueType(), CondLHS, True);
+    } else
+      Cond = DAG.getSetCC(MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+  } else {
+    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+    uint64_t Low = cast<ConstantInt>(CB.CmpLHS)->getSExtValue();
+    uint64_t High  = cast<ConstantInt>(CB.CmpRHS)->getSExtValue();
+
+    SDOperand CmpOp = getValue(CB.CmpMHS);
+    MVT::ValueType VT = CmpOp.getValueType();
+
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+      Cond = DAG.getSetCC(MVT::i1, CmpOp, DAG.getConstant(High, VT), ISD::SETLE);
+    } else {
+      SDOperand SUB = DAG.getNode(ISD::SUB, VT, CmpOp, DAG.getConstant(Low, VT));
+      Cond = DAG.getSetCC(MVT::i1, SUB,
+                          DAG.getConstant(High-Low, VT), ISD::SETULE);
+    }
+    
+  }
+  
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+  
+  // If the lhs block is the next block, invert the condition so that we can
+  // fall through to the lhs instead of the rhs block.
+  if (CB.TrueBB == NextBlock) {
+    std::swap(CB.TrueBB, CB.FalseBB);
+    SDOperand True = DAG.getConstant(1, Cond.getValueType());
+    Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True);
+  }
+  SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), Cond,
+                                 DAG.getBasicBlock(CB.TrueBB));
+  if (CB.FalseBB == NextBlock)
+    DAG.setRoot(BrCond);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond, 
+                            DAG.getBasicBlock(CB.FalseBB)));
+  // Update successor info
+  CurMBB->addSuccessor(CB.TrueBB);
+  CurMBB->addSuccessor(CB.FalseBB);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGLowering::visitJumpTable(SelectionDAGISel::JumpTable &JT) {
+  // Emit the code for the jump table
+  assert(JT.Reg != -1U && "Should lower JT Header first!");
+  MVT::ValueType PTy = TLI.getPointerTy();
+  SDOperand Index = DAG.getCopyFromReg(getRoot(), JT.Reg, PTy);
+  SDOperand Table = DAG.getJumpTable(JT.JTI, PTy);
+  DAG.setRoot(DAG.getNode(ISD::BR_JT, MVT::Other, Index.getValue(1),
+                          Table, Index));
+  return;
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGLowering::visitJumpTableHeader(SelectionDAGISel::JumpTable &JT,
+                                         SelectionDAGISel::JumpTableHeader &JTH) {
+  // Subtract the lowest switch case value from the value being switched on
+  // and conditional branch to default mbb if the result is greater than the
+  // difference between smallest and largest cases.
+  SDOperand SwitchOp = getValue(JTH.SValue);
+  MVT::ValueType VT = SwitchOp.getValueType();
+  SDOperand SUB = DAG.getNode(ISD::SUB, VT, SwitchOp,
+                              DAG.getConstant(JTH.First, VT));
+  
+  // The SDNode we just created, which holds the value being switched on
+  // minus the the smallest case value, needs to be copied to a virtual
+  // register so it can be used as an index into the jump table in a 
+  // subsequent basic block.  This value may be smaller or larger than the
+  // target's pointer type, and therefore require extension or truncating.
+  if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(TLI.getPointerTy()))
+    SwitchOp = DAG.getNode(ISD::TRUNCATE, TLI.getPointerTy(), SUB);
+  else
+    SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(), SUB);
+  
+  unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+  SDOperand CopyTo = DAG.getCopyToReg(getRoot(), JumpTableReg, SwitchOp);
+  JT.Reg = JumpTableReg;
+
+  // Emit the range check for the jump table, and branch to the default
+  // block for the switch statement if the value being switched on exceeds
+  // the largest case in the switch.
+  SDOperand CMP = DAG.getSetCC(TLI.getSetCCResultTy(), SUB,
+                               DAG.getConstant(JTH.Last-JTH.First,VT),
+                               ISD::SETUGT);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, CopyTo, CMP,
+                                 DAG.getBasicBlock(JT.Default));
+
+  if (JT.MBB == NextBlock)
+    DAG.setRoot(BrCond);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond, 
+                            DAG.getBasicBlock(JT.MBB)));
+
+  return;
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGLowering::visitBitTestHeader(SelectionDAGISel::BitTestBlock &B) {
+  // Subtract the minimum value
+  SDOperand SwitchOp = getValue(B.SValue);
+  MVT::ValueType VT = SwitchOp.getValueType();
+  SDOperand SUB = DAG.getNode(ISD::SUB, VT, SwitchOp,
+                              DAG.getConstant(B.First, VT));
+
+  // Check range
+  SDOperand RangeCmp = DAG.getSetCC(TLI.getSetCCResultTy(), SUB,
+                                    DAG.getConstant(B.Range, VT),
+                                    ISD::SETUGT);
+
+  SDOperand ShiftOp;
+  if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(TLI.getShiftAmountTy()))
+    ShiftOp = DAG.getNode(ISD::TRUNCATE, TLI.getShiftAmountTy(), SUB);
+  else
+    ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, TLI.getShiftAmountTy(), SUB);
+
+  // Make desired shift
+  SDOperand SwitchVal = DAG.getNode(ISD::SHL, TLI.getPointerTy(),
+                                    DAG.getConstant(1, TLI.getPointerTy()),
+                                    ShiftOp);
+
+  unsigned SwitchReg = FuncInfo.MakeReg(TLI.getPointerTy());
+  SDOperand CopyTo = DAG.getCopyToReg(getRoot(), SwitchReg, SwitchVal);
+  B.Reg = SwitchReg;
+
+  SDOperand BrRange = DAG.getNode(ISD::BRCOND, MVT::Other, CopyTo, RangeCmp,
+                                  DAG.getBasicBlock(B.Default));
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+  if (MBB == NextBlock)
+    DAG.setRoot(BrRange);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, CopyTo,
+                            DAG.getBasicBlock(MBB)));
+
+  CurMBB->addSuccessor(B.Default);
+  CurMBB->addSuccessor(MBB);
+
+  return;
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
+                                            unsigned Reg,
+                                            SelectionDAGISel::BitTestCase &B) {
+  // Emit bit tests and jumps
+  SDOperand SwitchVal = DAG.getCopyFromReg(getRoot(), Reg, TLI.getPointerTy());
+  
+  SDOperand AndOp = DAG.getNode(ISD::AND, TLI.getPointerTy(),
+                                SwitchVal,
+                                DAG.getConstant(B.Mask,
+                                                TLI.getPointerTy()));
+  SDOperand AndCmp = DAG.getSetCC(TLI.getSetCCResultTy(), AndOp,
+                                  DAG.getConstant(0, TLI.getPointerTy()),
+                                  ISD::SETNE);
+  SDOperand BrAnd = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(),
+                                AndCmp, DAG.getBasicBlock(B.TargetBB));
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  if (NextMBB == NextBlock)
+    DAG.setRoot(BrAnd);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrAnd,
+                            DAG.getBasicBlock(NextMBB)));
+
+  CurMBB->addSuccessor(B.TargetBB);
+  CurMBB->addSuccessor(NextMBB);
+
+  return;
+}
+
+void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
+  // Retrieve successors.
+  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  LowerCallTo(I, I.getCalledValue()->getType(),
+              I.getCallingConv(),
+              false,
+              getValue(I.getOperand(0)),
+              3, LandingPad);
+
+  // If the value of the invoke is used outside of its defining block, make it
+  // available as a virtual register.
+  if (!I.use_empty()) {
+    DenseMap<const Value*, unsigned>::iterator VMI = FuncInfo.ValueMap.find(&I);
+    if (VMI != FuncInfo.ValueMap.end())
+      DAG.setRoot(CopyValueToVirtualRegister(&I, VMI->second));
+  }
+
+  // Drop into normal successor.
+  DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+                          DAG.getBasicBlock(Return)));
+
+  // Update successor info
+  CurMBB->addSuccessor(Return);
+  CurMBB->addSuccessor(LandingPad);
+}
+
+void SelectionDAGLowering::visitUnwind(UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
+                                                  CaseRecVector& WorkList,
+                                                  Value* SV,
+                                                  MachineBasicBlock* Default) {
+  Case& BackCase  = *(CR.Range.second-1);
+  
+  // Size is the number of Cases represented by this range.
+  unsigned Size = CR.Range.second - CR.Range.first;
+  if (Size > 3)
+    return false;  
+  
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = CurMBB->getParent();  
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  // TODO: If any two of the cases has the same destination, and if one value
+  // is the same as the other, but has one bit unset that the other has set,
+  // use bit manipulation to do two compares at once.  For example:
+  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+    
+  // Rearrange the case blocks so that the last one falls through if possible.
+  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+    // The last case block won't fall through into 'NextBlock' if we emit the
+    // branches in this order.  See if rearranging a case value would help.
+    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+      if (I->BB == NextBlock) {
+        std::swap(*I, BackCase);
+        break;
+      }
+    }
+  }
+  
+  // Create a CaseBlock record representing a conditional branch to
+  // the Case's target mbb if the value being switched on SV is equal
+  // to C.
+  MachineBasicBlock *CurBlock = CR.CaseBB;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+    MachineBasicBlock *FallThrough;
+    if (I != E-1) {
+      FallThrough = new MachineBasicBlock(CurBlock->getBasicBlock());
+      CurMF->getBasicBlockList().insert(BBI, FallThrough);
+    } else {
+      // If the last case doesn't match, go to the default block.
+      FallThrough = Default;
+    }
+
+    Value *RHS, *LHS, *MHS;
+    ISD::CondCode CC;
+    if (I->High == I->Low) {
+      // This is just small small case range :) containing exactly 1 case
+      CC = ISD::SETEQ;
+      LHS = SV; RHS = I->High; MHS = NULL;
+    } else {
+      CC = ISD::SETLE;
+      LHS = I->Low; MHS = SV; RHS = I->High;
+    }
+    SelectionDAGISel::CaseBlock CB(CC, LHS, RHS, MHS,
+                                   I->BB, FallThrough, CurBlock);
+    
+    // If emitting the first comparison, just call visitSwitchCase to emit the
+    // code into the current block.  Otherwise, push the CaseBlock onto the
+    // vector to be later processed by SDISel, and insert the node's MBB
+    // before the next MBB.
+    if (CurBlock == CurMBB)
+      visitSwitchCase(CB);
+    else
+      SwitchCases.push_back(CB);
+    
+    CurBlock = FallThrough;
+  }
+
+  return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+  return (TLI.isOperationLegal(ISD::BR_JT, MVT::Other) ||
+          TLI.isOperationLegal(ISD::BRIND, MVT::Other));
+}
+  
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
+                                              CaseRecVector& WorkList,
+                                              Value* SV,
+                                              MachineBasicBlock* Default) {
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  int64_t First = cast<ConstantInt>(FrontCase.Low)->getSExtValue();
+  int64_t Last  = cast<ConstantInt>(BackCase.High)->getSExtValue();
+
+  uint64_t TSize = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  if (!areJTsAllowed(TLI) || TSize <= 3)
+    return false;
+  
+  double Density = (double)TSize / (double)((Last - First) + 1ULL);  
+  if (Density < 0.4)
+    return false;
+
+  DOUT << "Lowering jump table\n"
+       << "First entry: " << First << ". Last entry: " << Last << "\n"
+       << "Size: " << TSize << ". Density: " << Density << "\n\n";
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = CurMBB->getParent();
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Create a new basic block to hold the code for loading the address
+  // of the jump table, and jumping to it.  Update successor information;
+  // we will either branch to the default case for the switch, or the jump
+  // table.
+  MachineBasicBlock *JumpTableBB = new MachineBasicBlock(LLVMBB);
+  CurMF->getBasicBlockList().insert(BBI, JumpTableBB);
+  CR.CaseBB->addSuccessor(Default);
+  CR.CaseBB->addSuccessor(JumpTableBB);
+                
+  // Build a vector of destination BBs, corresponding to each target
+  // of the jump table. If the value of the jump table slot corresponds to
+  // a case statement, push the case's BB onto the vector, otherwise, push
+  // the default BB.
+  std::vector<MachineBasicBlock*> DestBBs;
+  int64_t TEI = First;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+    int64_t Low = cast<ConstantInt>(I->Low)->getSExtValue();
+    int64_t High = cast<ConstantInt>(I->High)->getSExtValue();
+    
+    if ((Low <= TEI) && (TEI <= High)) {
+      DestBBs.push_back(I->BB);
+      if (TEI==High)
+        ++I;
+    } else {
+      DestBBs.push_back(Default);
+    }
+  }
+  
+  // Update successor info. Add one edge to each unique successor.
+  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());  
+  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), 
+         E = DestBBs.end(); I != E; ++I) {
+    if (!SuccsHandled[(*I)->getNumber()]) {
+      SuccsHandled[(*I)->getNumber()] = true;
+      JumpTableBB->addSuccessor(*I);
+    }
+  }
+      
+  // Create a jump table index for this jump table, or return an existing
+  // one.
+  unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
+  
+  // Set the jump table information so that we can codegen it as a second
+  // MachineBasicBlock
+  SelectionDAGISel::JumpTable JT(-1U, JTI, JumpTableBB, Default);
+  SelectionDAGISel::JumpTableHeader JTH(First, Last, SV, CR.CaseBB,
+                                        (CR.CaseBB == CurMBB));
+  if (CR.CaseBB == CurMBB)
+    visitJumpTableHeader(JT, JTH);
+        
+  JTCases.push_back(SelectionDAGISel::JumpTableBlock(JTH, JT));
+
+  return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
+                                                   CaseRecVector& WorkList,
+                                                   Value* SV,
+                                                   MachineBasicBlock* Default) {
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = CurMBB->getParent();  
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != CurMBB->getParent()->end())
+    NextBlock = BBI;
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Size is the number of Cases represented by this range.
+  unsigned Size = CR.Range.second - CR.Range.first;
+
+  int64_t First = cast<ConstantInt>(FrontCase.Low)->getSExtValue();
+  int64_t Last  = cast<ConstantInt>(BackCase.High)->getSExtValue();
+  double FMetric = 0;
+  CaseItr Pivot = CR.Range.first + Size/2;
+
+  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+  // (heuristically) allow us to emit JumpTable's later.
+  uint64_t TSize = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  uint64_t LSize = FrontCase.size();
+  uint64_t RSize = TSize-LSize;
+  DOUT << "Selecting best pivot: \n"
+       << "First: " << First << ", Last: " << Last <<"\n"
+       << "LSize: " << LSize << ", RSize: " << RSize << "\n";
+  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+       J!=E; ++I, ++J) {
+    int64_t LEnd = cast<ConstantInt>(I->High)->getSExtValue();
+    int64_t RBegin = cast<ConstantInt>(J->Low)->getSExtValue();
+    assert((RBegin-LEnd>=1) && "Invalid case distance");
+    double LDensity = (double)LSize / (double)((LEnd - First) + 1ULL);
+    double RDensity = (double)RSize / (double)((Last - RBegin) + 1ULL);
+    double Metric = Log2_64(RBegin-LEnd)*(LDensity+RDensity);
+    // Should always split in some non-trivial place
+    DOUT <<"=>Step\n"
+         << "LEnd: " << LEnd << ", RBegin: " << RBegin << "\n"
+         << "LDensity: " << LDensity << ", RDensity: " << RDensity << "\n"
+         << "Metric: " << Metric << "\n"; 
+    if (FMetric < Metric) {
+      Pivot = J;
+      FMetric = Metric;
+      DOUT << "Current metric set to: " << FMetric << "\n";
+    }
+
+    LSize += J->size();
+    RSize -= J->size();
+  }
+  if (areJTsAllowed(TLI)) {
+    // If our case is dense we *really* should handle it earlier!
+    assert((FMetric > 0) && "Should handle dense range earlier!");
+  } else {
+    Pivot = CR.Range.first + Size/2;
+  }
+  
+  CaseRange LHSR(CR.Range.first, Pivot);
+  CaseRange RHSR(Pivot, CR.Range.second);
+  Constant *C = Pivot->Low;
+  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+      
+  // We know that we branch to the LHS if the Value being switched on is
+  // less than the Pivot value, C.  We use this to optimize our binary 
+  // tree a bit, by recognizing that if SV is greater than or equal to the
+  // LHS's Case Value, and that Case Value is exactly one less than the 
+  // Pivot's Value, then we can branch directly to the LHS's Target,
+  // rather than creating a leaf node for it.
+  if ((LHSR.second - LHSR.first) == 1 &&
+      LHSR.first->High == CR.GE &&
+      cast<ConstantInt>(C)->getSExtValue() ==
+      (cast<ConstantInt>(CR.GE)->getSExtValue() + 1LL)) {
+    TrueBB = LHSR.first->BB;
+  } else {
+    TrueBB = new MachineBasicBlock(LLVMBB);
+    CurMF->getBasicBlockList().insert(BBI, TrueBB);
+    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+  }
+  
+  // Similar to the optimization above, if the Value being switched on is
+  // known to be less than the Constant CR.LT, and the current Case Value
+  // is CR.LT - 1, then we can branch directly to the target block for
+  // the current Case Value, rather than emitting a RHS leaf node for it.
+  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+      cast<ConstantInt>(RHSR.first->Low)->getSExtValue() ==
+      (cast<ConstantInt>(CR.LT)->getSExtValue() - 1LL)) {
+    FalseBB = RHSR.first->BB;
+  } else {
+    FalseBB = new MachineBasicBlock(LLVMBB);
+    CurMF->getBasicBlockList().insert(BBI, FalseBB);
+    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the LHS node if the value being switched on SV is less than C. 
+  // Otherwise, branch to LHS.
+  SelectionDAGISel::CaseBlock CB(ISD::SETLT, SV, C, NULL,
+                                 TrueBB, FalseBB, CR.CaseBB);
+
+  if (CR.CaseBB == CurMBB)
+    visitSwitchCase(CB);
+  else
+    SwitchCases.push_back(CB);
+
+  return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
+                                                    CaseRecVector& WorkList,
+                                                    Value* SV,
+                                                    MachineBasicBlock* Default){
+  unsigned IntPtrBits = MVT::getSizeInBits(TLI.getPointerTy());
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = CurMBB->getParent();  
+
+  unsigned numCmps = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I) {
+    // Single case counts one, case range - two.
+    if (I->Low == I->High)
+      numCmps +=1;
+    else
+      numCmps +=2;
+  }
+    
+  // Count unique destinations
+  SmallSet<MachineBasicBlock*, 4> Dests;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    Dests.insert(I->BB);
+    if (Dests.size() > 3)
+      // Don't bother the code below, if there are too much unique destinations
+      return false;
+  }
+  DOUT << "Total number of unique destinations: " << Dests.size() << "\n"
+       << "Total number of comparisons: " << numCmps << "\n";
+  
+  // Compute span of values.
+  Constant* minValue = FrontCase.Low;
+  Constant* maxValue = BackCase.High;
+  uint64_t range = cast<ConstantInt>(maxValue)->getSExtValue() -
+                   cast<ConstantInt>(minValue)->getSExtValue();
+  DOUT << "Compare range: " << range << "\n"
+       << "Low bound: " << cast<ConstantInt>(minValue)->getSExtValue() << "\n"
+       << "High bound: " << cast<ConstantInt>(maxValue)->getSExtValue() << "\n";
+  
+  if (range>=IntPtrBits ||
+      (!(Dests.size() == 1 && numCmps >= 3) &&
+       !(Dests.size() == 2 && numCmps >= 5) &&
+       !(Dests.size() >= 3 && numCmps >= 6)))
+    return false;
+  
+  DOUT << "Emitting bit tests\n";
+  int64_t lowBound = 0;
+    
+  // Optimize the case where all the case values fit in a
+  // word without having to subtract minValue. In this case,
+  // we can optimize away the subtraction.
+  if (cast<ConstantInt>(minValue)->getSExtValue() >= 0 &&
+      cast<ConstantInt>(maxValue)->getSExtValue() <  IntPtrBits) {
+    range = cast<ConstantInt>(maxValue)->getSExtValue();
+  } else {
+    lowBound = cast<ConstantInt>(minValue)->getSExtValue();
+  }
+    
+  CaseBitsVector CasesBits;
+  unsigned i, count = 0;
+
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    MachineBasicBlock* Dest = I->BB;
+    for (i = 0; i < count; ++i)
+      if (Dest == CasesBits[i].BB)
+        break;
+    
+    if (i == count) {
+      assert((count < 3) && "Too much destinations to test!");
+      CasesBits.push_back(CaseBits(0, Dest, 0));
+      count++;
+    }
+    
+    uint64_t lo = cast<ConstantInt>(I->Low)->getSExtValue() - lowBound;
+    uint64_t hi = cast<ConstantInt>(I->High)->getSExtValue() - lowBound;
+    
+    for (uint64_t j = lo; j <= hi; j++) {
+      CasesBits[i].Mask |=  1ULL << j;
+      CasesBits[i].Bits++;
+    }
+      
+  }
+  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+  
+  SelectionDAGISel::BitTestInfo BTC;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  DOUT << "Cases:\n";
+  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+    DOUT << "Mask: " << CasesBits[i].Mask << ", Bits: " << CasesBits[i].Bits
+         << ", BB: " << CasesBits[i].BB << "\n";
+
+    MachineBasicBlock *CaseBB = new MachineBasicBlock(LLVMBB);
+    CurMF->getBasicBlockList().insert(BBI, CaseBB);
+    BTC.push_back(SelectionDAGISel::BitTestCase(CasesBits[i].Mask,
+                                                CaseBB,
+                                                CasesBits[i].BB));
+  }
+  
+  SelectionDAGISel::BitTestBlock BTB(lowBound, range, SV,
+                                     -1U, (CR.CaseBB == CurMBB),
+                                     CR.CaseBB, Default, BTC);
+
+  if (CR.CaseBB == CurMBB)
+    visitBitTestHeader(BTB);
+  
+  BitTestCases.push_back(BTB);
+
+  return true;
+}
+
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned SelectionDAGLowering::Clusterify(CaseVector& Cases,
+                                          const SwitchInst& SI) {
+  unsigned numCmps = 0;
+
+  // Start with "simple" cases
+  for (unsigned i = 1; i < SI.getNumSuccessors(); ++i) {
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+    Cases.push_back(Case(SI.getSuccessorValue(i),
+                         SI.getSuccessorValue(i),
+                         SMBB));
+  }
+  sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size()>=2)
+    // Must recompute end() each iteration because it may be
+    // invalidated by erase if we hold on to it
+    for (CaseItr I=Cases.begin(), J=++(Cases.begin()); J!=Cases.end(); ) {
+      int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+      int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+      MachineBasicBlock* nextBB = J->BB;
+      MachineBasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {  
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+
+  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+  // If there is only the default destination, branch to it if it is not the
+  // next basic block.  Otherwise, just fall through.
+  if (SI.getNumOperands() == 2) {
+    // Update machine-CFG edges.
+
+    // If this is not a fall-through branch, emit the branch.
+    if (Default != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+                              DAG.getBasicBlock(Default)));
+
+    CurMBB->addSuccessor(Default);
+    return;
+  }
+  
+  // If there are any non-default case statements, create a vector of Cases
+  // representing each one, and sort the vector so that we can efficiently
+  // create a binary search tree from them.
+  CaseVector Cases;
+  unsigned numCmps = Clusterify(Cases, SI);
+  DOUT << "Clusterify finished. Total clusters: " << Cases.size()
+       << ". Total compares: " << numCmps << "\n";
+
+  // Get the Value to be switched on and default basic blocks, which will be
+  // inserted into CaseBlock records, representing basic blocks in the binary
+  // search tree.
+  Value *SV = SI.getOperand(0);
+
+  // Push the initial CaseRec onto the worklist
+  CaseRecVector WorkList;
+  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+
+  while (!WorkList.empty()) {
+    // Grab a record representing a case range to process off the worklist
+    CaseRec CR = WorkList.back();
+    WorkList.pop_back();
+
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+      continue;
+    
+    // If the range has few cases (two or less) emit a series of specific
+    // tests.
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+      continue;
+    
+    // If the switch has more than 5 blocks, and at least 40% dense, and the 
+    // target supports indirect branches, then emit a jump table rather than 
+    // lowering the switch to a binary tree of conditional branches.
+    if (handleJTSwitchCase(CR, WorkList, SV, Default))
+      continue;
+          
+    // Emit binary tree. We need to pick a pivot, and push left and right ranges
+    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+  }
+}
+
+
+void SelectionDAGLowering::visitSub(User &I) {
+  // -0.0 - X --> fneg
+  const Type *Ty = I.getType();
+  if (isa<VectorType>(Ty)) {
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
+      const VectorType *DestTy = cast<VectorType>(I.getType());
+      const Type *ElTy = DestTy->getElementType();
+      if (ElTy->isFloatingPoint()) {
+        unsigned VL = DestTy->getNumElements();
+        std::vector<Constant*> NZ(VL, ConstantFP::get(ElTy, -0.0));
+        Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+        if (CV == CNZ) {
+          SDOperand Op2 = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2));
+          return;
+        }
+      }
+    }
+  }
+  if (Ty->isFloatingPoint()) {
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+      if (CFP->isExactlyValue(-0.0)) {
+        SDOperand Op2 = getValue(I.getOperand(1));
+        setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2));
+        return;
+      }
+  }
+
+  visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB);
+}
+
+void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
+  SDOperand Op1 = getValue(I.getOperand(0));
+  SDOperand Op2 = getValue(I.getOperand(1));
+  
+  setValue(&I, DAG.getNode(OpCode, Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
+  SDOperand Op1 = getValue(I.getOperand(0));
+  SDOperand Op2 = getValue(I.getOperand(1));
+  
+  if (MVT::getSizeInBits(TLI.getShiftAmountTy()) <
+      MVT::getSizeInBits(Op2.getValueType()))
+    Op2 = DAG.getNode(ISD::TRUNCATE, TLI.getShiftAmountTy(), Op2);
+  else if (TLI.getShiftAmountTy() > Op2.getValueType())
+    Op2 = DAG.getNode(ISD::ANY_EXTEND, TLI.getShiftAmountTy(), Op2);
+  
+  setValue(&I, DAG.getNode(Opcode, Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitICmp(User &I) {
+  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+    predicate = IC->getPredicate();
+  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+    predicate = ICmpInst::Predicate(IC->getPredicate());
+  SDOperand Op1 = getValue(I.getOperand(0));
+  SDOperand Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Opcode;
+  switch (predicate) {
+    case ICmpInst::ICMP_EQ  : Opcode = ISD::SETEQ; break;
+    case ICmpInst::ICMP_NE  : Opcode = ISD::SETNE; break;
+    case ICmpInst::ICMP_UGT : Opcode = ISD::SETUGT; break;
+    case ICmpInst::ICMP_UGE : Opcode = ISD::SETUGE; break;
+    case ICmpInst::ICMP_ULT : Opcode = ISD::SETULT; break;
+    case ICmpInst::ICMP_ULE : Opcode = ISD::SETULE; break;
+    case ICmpInst::ICMP_SGT : Opcode = ISD::SETGT; break;
+    case ICmpInst::ICMP_SGE : Opcode = ISD::SETGE; break;
+    case ICmpInst::ICMP_SLT : Opcode = ISD::SETLT; break;
+    case ICmpInst::ICMP_SLE : Opcode = ISD::SETLE; break;
+    default:
+      assert(!"Invalid ICmp predicate value");
+      Opcode = ISD::SETEQ;
+      break;
+  }
+  setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode));
+}
+
+void SelectionDAGLowering::visitFCmp(User &I) {
+  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+    predicate = FC->getPredicate();
+  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+    predicate = FCmpInst::Predicate(FC->getPredicate());
+  SDOperand Op1 = getValue(I.getOperand(0));
+  SDOperand Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Condition, FOC, FPC;
+  switch (predicate) {
+    case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+    case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+    case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+    case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+    case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+    case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+    case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+    case FCmpInst::FCMP_ORD:   FOC = ISD::SETEQ; FPC = ISD::SETO;   break;
+    case FCmpInst::FCMP_UNO:   FOC = ISD::SETNE; FPC = ISD::SETUO;  break;
+    case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+    case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+    case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+    case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+    case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+    case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+    case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+    default:
+      assert(!"Invalid FCmp predicate value");
+      FOC = FPC = ISD::SETFALSE;
+      break;
+  }
+  if (FiniteOnlyFPMath())
+    Condition = FOC;
+  else 
+    Condition = FPC;
+  setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Condition));
+}
+
+void SelectionDAGLowering::visitSelect(User &I) {
+  SDOperand Cond     = getValue(I.getOperand(0));
+  SDOperand TrueVal  = getValue(I.getOperand(1));
+  SDOperand FalseVal = getValue(I.getOperand(2));
+  setValue(&I, DAG.getNode(ISD::SELECT, TrueVal.getValueType(), Cond,
+                           TrueVal, FalseVal));
+}
+
+
+void SelectionDAGLowering::visitTrunc(User &I) {
+  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N));
+}
+
+void SelectionDAGLowering::visitZExt(User &I) {
+  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitSExt(User &I) {
+  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // SExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPTrunc(User &I) {
+  // FPTrunc is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPExt(User &I){ 
+  // FPTrunc is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToUI(User &I) { 
+  // FPToUI is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToSI(User &I) {
+  // FPToSI is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestVT, N));
+}
+
+void SelectionDAGLowering::visitUIToFP(User &I) { 
+  // UIToFP is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestVT, N));
+}
+
+void SelectionDAGLowering::visitSIToFP(User &I){ 
+  // UIToFP is never a no-op cast, no need to check
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestVT, N));
+}
+
+void SelectionDAGLowering::visitPtrToInt(User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType SrcVT = N.getValueType();
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  SDOperand Result;
+  if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT))
+    Result = DAG.getNode(ISD::TRUNCATE, DestVT, N);
+  else 
+    // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+    Result = DAG.getNode(ISD::ZERO_EXTEND, DestVT, N);
+  setValue(&I, Result);
+}
+
+void SelectionDAGLowering::visitIntToPtr(User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType SrcVT = N.getValueType();
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+  if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT))
+    setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N));
+  else 
+    // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+    setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitBitCast(User &I) { 
+  SDOperand N = getValue(I.getOperand(0));
+  MVT::ValueType DestVT = TLI.getValueType(I.getType());
+
+  // BitCast assures us that source and destination are the same size so this 
+  // is either a BIT_CONVERT or a no-op.
+  if (DestVT != N.getValueType())
+    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, DestVT, N)); // convert types
+  else
+    setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGLowering::visitInsertElement(User &I) {
+  SDOperand InVec = getValue(I.getOperand(0));
+  SDOperand InVal = getValue(I.getOperand(1));
+  SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(),
+                                getValue(I.getOperand(2)));
+
+  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT,
+                           TLI.getValueType(I.getType()),
+                           InVec, InVal, InIdx));
+}
+
+void SelectionDAGLowering::visitExtractElement(User &I) {
+  SDOperand InVec = getValue(I.getOperand(0));
+  SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(),
+                                getValue(I.getOperand(1)));
+  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                           TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+void SelectionDAGLowering::visitShuffleVector(User &I) {
+  SDOperand V1   = getValue(I.getOperand(0));
+  SDOperand V2   = getValue(I.getOperand(1));
+  SDOperand Mask = getValue(I.getOperand(2));
+
+  setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE,
+                           TLI.getValueType(I.getType()),
+                           V1, V2, Mask));
+}
+
+
+void SelectionDAGLowering::visitGetElementPtr(User &I) {
+  SDOperand N = getValue(I.getOperand(0));
+  const Type *Ty = I.getOperand(0)->getType();
+
+  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+       OI != E; ++OI) {
+    Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+        N = DAG.getNode(ISD::ADD, N.getValueType(), N,
+                        getIntPtrConstant(Offset));
+      }
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->getZExtValue() == 0) continue;
+        uint64_t Offs = 
+            TD->getTypeSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs));
+        continue;
+      }
+      
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = TD->getTypeSize(Ty);
+      SDOperand IdxN = getValue(Idx);
+
+      // If the index is smaller or larger than intptr_t, truncate or extend
+      // it.
+      if (IdxN.getValueType() < N.getValueType()) {
+        IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN);
+      } else if (IdxN.getValueType() > N.getValueType())
+        IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN);
+
+      // If this is a multiply by a power of two, turn it into a shl
+      // immediately.  This is a very common case.
+      if (isPowerOf2_64(ElementSize)) {
+        unsigned Amt = Log2_64(ElementSize);
+        IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN,
+                           DAG.getConstant(Amt, TLI.getShiftAmountTy()));
+        N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
+        continue;
+      }
+      
+      SDOperand Scale = getIntPtrConstant(ElementSize);
+      IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
+      N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
+    }
+  }
+  setValue(&I, N);
+}
+
+void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
+  // If this is a fixed sized alloca in the entry block of the function,
+  // allocate it statically on the stack.
+  if (FuncInfo.StaticAllocaMap.count(&I))
+    return;   // getValue will auto-populate this.
+
+  const Type *Ty = I.getAllocatedType();
+  uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+  unsigned Align =
+    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+             I.getAlignment());
+
+  SDOperand AllocSize = getValue(I.getArraySize());
+  MVT::ValueType IntPtr = TLI.getPointerTy();
+  if (IntPtr < AllocSize.getValueType())
+    AllocSize = DAG.getNode(ISD::TRUNCATE, IntPtr, AllocSize);
+  else if (IntPtr > AllocSize.getValueType())
+    AllocSize = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, AllocSize);
+
+  AllocSize = DAG.getNode(ISD::MUL, IntPtr, AllocSize,
+                          getIntPtrConstant(TySize));
+
+  // Handle alignment.  If the requested alignment is less than the stack
+  // alignment, ignore it and round the size of the allocation up to the stack
+  // alignment size.  If the size is greater than or equal to the stack
+  // alignment, we note this in the DYNAMIC_STACKALLOC node.
+  unsigned StackAlign =
+    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  if (Align < StackAlign) {
+    Align = 0;
+    // Add SA-1 to the size.
+    AllocSize = DAG.getNode(ISD::ADD, AllocSize.getValueType(), AllocSize,
+                            getIntPtrConstant(StackAlign-1));
+    // Mask out the low bits for alignment purposes.
+    AllocSize = DAG.getNode(ISD::AND, AllocSize.getValueType(), AllocSize,
+                            getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+  }
+
+  SDOperand Ops[] = { getRoot(), AllocSize, getIntPtrConstant(Align) };
+  const MVT::ValueType *VTs = DAG.getNodeValueTypes(AllocSize.getValueType(),
+                                                    MVT::Other);
+  SDOperand DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, VTs, 2, Ops, 3);
+  setValue(&I, DSA);
+  DAG.setRoot(DSA.getValue(1));
+
+  // Inform the Frame Information that we have just allocated a variable-sized
+  // object.
+  CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
+}
+
+void SelectionDAGLowering::visitLoad(LoadInst &I) {
+  SDOperand Ptr = getValue(I.getOperand(0));
+
+  SDOperand Root;
+  if (I.isVolatile())
+    Root = getRoot();
+  else {
+    // Do not serialize non-volatile loads against each other.
+    Root = DAG.getRoot();
+  }
+
+  setValue(&I, getLoadFrom(I.getType(), Ptr, I.getOperand(0),
+                           Root, I.isVolatile(), I.getAlignment()));
+}
+
+SDOperand SelectionDAGLowering::getLoadFrom(const Type *Ty, SDOperand Ptr,
+                                            const Value *SV, SDOperand Root,
+                                            bool isVolatile, 
+                                            unsigned Alignment) {
+  SDOperand L =
+    DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, SV, 0, 
+                isVolatile, Alignment);
+
+  if (isVolatile)
+    DAG.setRoot(L.getValue(1));
+  else
+    PendingLoads.push_back(L.getValue(1));
+  
+  return L;
+}
+
+
+void SelectionDAGLowering::visitStore(StoreInst &I) {
+  Value *SrcV = I.getOperand(0);
+  SDOperand Src = getValue(SrcV);
+  SDOperand Ptr = getValue(I.getOperand(1));
+  DAG.setRoot(DAG.getStore(getRoot(), Src, Ptr, I.getOperand(1), 0,
+                           I.isVolatile(), I.getAlignment()));
+}
+
+/// IntrinsicCannotAccessMemory - Return true if the specified intrinsic cannot
+/// access memory and has no other side effects at all.
+static bool IntrinsicCannotAccessMemory(unsigned IntrinsicID) {
+#define GET_NO_MEMORY_INTRINSICS
+#include "llvm/Intrinsics.gen"
+#undef GET_NO_MEMORY_INTRINSICS
+  return false;
+}
+
+// IntrinsicOnlyReadsMemory - Return true if the specified intrinsic doesn't
+// have any side-effects or if it only reads memory.
+static bool IntrinsicOnlyReadsMemory(unsigned IntrinsicID) {
+#define GET_SIDE_EFFECT_INFO
+#include "llvm/Intrinsics.gen"
+#undef GET_SIDE_EFFECT_INFO
+  return false;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, 
+                                                unsigned Intrinsic) {
+  bool HasChain = !IntrinsicCannotAccessMemory(Intrinsic);
+  bool OnlyLoad = HasChain && IntrinsicOnlyReadsMemory(Intrinsic);
+  
+  // Build the operand list.
+  SmallVector<SDOperand, 8> Ops;
+  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
+    if (OnlyLoad) {
+      // We don't need to serialize loads against other loads.
+      Ops.push_back(DAG.getRoot());
+    } else { 
+      Ops.push_back(getRoot());
+    }
+  }
+  
+  // Add the intrinsic ID as an integer operand.
+  Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+  // Add all operands of the call to the operand list.
+  for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+    SDOperand Op = getValue(I.getOperand(i));
+    assert(TLI.isTypeLegal(Op.getValueType()) &&
+           "Intrinsic uses a non-legal type?");
+    Ops.push_back(Op);
+  }
+
+  std::vector<MVT::ValueType> VTs;
+  if (I.getType() != Type::VoidTy) {
+    MVT::ValueType VT = TLI.getValueType(I.getType());
+    if (MVT::isVector(VT)) {
+      const VectorType *DestTy = cast<VectorType>(I.getType());
+      MVT::ValueType EltVT = TLI.getValueType(DestTy->getElementType());
+      
+      VT = MVT::getVectorType(EltVT, DestTy->getNumElements());
+      assert(VT != MVT::Other && "Intrinsic uses a non-legal type?");
+    }
+    
+    assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");
+    VTs.push_back(VT);
+  }
+  if (HasChain)
+    VTs.push_back(MVT::Other);
+
+  const MVT::ValueType *VTList = DAG.getNodeValueTypes(VTs);
+
+  // Create the node.
+  SDOperand Result;
+  if (!HasChain)
+    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VTList, VTs.size(),
+                         &Ops[0], Ops.size());
+  else if (I.getType() != Type::VoidTy)
+    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, VTList, VTs.size(),
+                         &Ops[0], Ops.size());
+  else
+    Result = DAG.getNode(ISD::INTRINSIC_VOID, VTList, VTs.size(),
+                         &Ops[0], Ops.size());
+
+  if (HasChain) {
+    SDOperand Chain = Result.getValue(Result.Val->getNumValues()-1);
+    if (OnlyLoad)
+      PendingLoads.push_back(Chain);
+    else
+      DAG.setRoot(Chain);
+  }
+  if (I.getType() != Type::VoidTy) {
+    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+      MVT::ValueType VT = TLI.getValueType(PTy);
+      Result = DAG.getNode(ISD::BIT_CONVERT, VT, Result);
+    } 
+    setValue(&I, Result);
+  }
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+static GlobalVariable *ExtractTypeInfo (Value *V) {
+  V = IntrinsicInst::StripPointerCasts(V);
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+  assert (GV || isa<ConstantPointerNull>(V) &&
+          "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// addCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+static void addCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+                         MachineBasicBlock *MBB) {
+  // Inform the MachineModuleInfo of the personality for this landing pad.
+  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+  assert(CE->getOpcode() == Instruction::BitCast &&
+         isa<Function>(CE->getOperand(0)) &&
+         "Personality should be a function");
+  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+  // Gather all the type infos for this landing pad and pass them along to
+  // MachineModuleInfo.
+  std::vector<GlobalVariable *> TyInfo;
+  unsigned N = I.getNumOperands();
+
+  for (unsigned i = N - 1; i > 2; --i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+      unsigned FilterLength = CI->getZExtValue();
+      unsigned FirstCatch = i + FilterLength + 1;
+      assert (FirstCatch <= N && "Invalid filter length");
+
+      if (FirstCatch < N) {
+        TyInfo.reserve(N - FirstCatch);
+        for (unsigned j = FirstCatch; j < N; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+        MMI->addCatchTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      TyInfo.reserve(FilterLength);
+      for (unsigned j = i + 1; j < FirstCatch; ++j)
+        TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+      MMI->addFilterTypeInfo(MBB, TyInfo);
+      TyInfo.clear();
+
+      N = i;
+    }
+  }
+
+  if (N > 3) {
+    TyInfo.reserve(N - 3);
+    for (unsigned j = 3; j < N; ++j)
+      TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+    MMI->addCatchTypeInfo(MBB, TyInfo);
+  }
+}
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+  switch (Intrinsic) {
+  default:
+    // By default, turn this into a target intrinsic node.
+    visitTargetIntrinsic(I, Intrinsic);
+    return 0;
+  case Intrinsic::vastart:  visitVAStart(I); return 0;
+  case Intrinsic::vaend:    visitVAEnd(I); return 0;
+  case Intrinsic::vacopy:   visitVACopy(I); return 0;
+  case Intrinsic::returnaddress:
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::frameaddress:
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::setjmp:
+    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+    break;
+  case Intrinsic::longjmp:
+    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+    break;
+  case Intrinsic::memcpy_i32:
+  case Intrinsic::memcpy_i64:
+    visitMemIntrinsic(I, ISD::MEMCPY);
+    return 0;
+  case Intrinsic::memset_i32:
+  case Intrinsic::memset_i64:
+    visitMemIntrinsic(I, ISD::MEMSET);
+    return 0;
+  case Intrinsic::memmove_i32:
+  case Intrinsic::memmove_i64:
+    visitMemIntrinsic(I, ISD::MEMMOVE);
+    return 0;
+    
+  case Intrinsic::dbg_stoppoint: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
+    if (MMI && SPI.getContext() && MMI->Verify(SPI.getContext())) {
+      SDOperand Ops[5];
+
+      Ops[0] = getRoot();
+      Ops[1] = getValue(SPI.getLineValue());
+      Ops[2] = getValue(SPI.getColumnValue());
+
+      DebugInfoDesc *DD = MMI->getDescFor(SPI.getContext());
+      assert(DD && "Not a debug information descriptor");
+      CompileUnitDesc *CompileUnit = cast<CompileUnitDesc>(DD);
+      
+      Ops[3] = DAG.getString(CompileUnit->getFileName());
+      Ops[4] = DAG.getString(CompileUnit->getDirectory());
+      
+      DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops, 5));
+    }
+
+    return 0;
+  }
+  case Intrinsic::dbg_region_start: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
+    if (MMI && RSI.getContext() && MMI->Verify(RSI.getContext())) {
+      unsigned LabelID = MMI->RecordRegionStart(RSI.getContext());
+      DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+                              DAG.getConstant(LabelID, MVT::i32)));
+    }
+
+    return 0;
+  }
+  case Intrinsic::dbg_region_end: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
+    if (MMI && REI.getContext() && MMI->Verify(REI.getContext())) {
+      unsigned LabelID = MMI->RecordRegionEnd(REI.getContext());
+      DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other,
+                              getRoot(), DAG.getConstant(LabelID, MVT::i32)));
+    }
+
+    return 0;
+  }
+  case Intrinsic::dbg_func_start: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
+    if (MMI && FSI.getSubprogram() &&
+        MMI->Verify(FSI.getSubprogram())) {
+      unsigned LabelID = MMI->RecordRegionStart(FSI.getSubprogram());
+      DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other,
+                  getRoot(), DAG.getConstant(LabelID, MVT::i32)));
+    }
+
+    return 0;
+  }
+  case Intrinsic::dbg_declare: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    if (MMI && DI.getVariable() && MMI->Verify(DI.getVariable())) {
+      SDOperand AddressOp  = getValue(DI.getAddress());
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddressOp))
+        MMI->RecordVariable(DI.getVariable(), FI->getIndex());
+    }
+
+    return 0;
+  }
+    
+  case Intrinsic::eh_exception: {
+    if (ExceptionHandling) {
+      if (!CurMBB->isLandingPad()) {
+        // FIXME: Mark exception register as live in.  Hack for PR1508.
+        unsigned Reg = TLI.getExceptionAddressRegister();
+        if (Reg) CurMBB->addLiveIn(Reg);
+      }
+      // Insert the EXCEPTIONADDR instruction.
+      SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+      SDOperand Ops[1];
+      Ops[0] = DAG.getRoot();
+      SDOperand Op = DAG.getNode(ISD::EXCEPTIONADDR, VTs, Ops, 1);
+      setValue(&I, Op);
+      DAG.setRoot(Op.getValue(1));
+    } else {
+      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+    }
+    return 0;
+  }
+
+  case Intrinsic::eh_selector:{
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+    if (ExceptionHandling && MMI) {
+      if (CurMBB->isLandingPad())
+        addCatchInfo(I, MMI, CurMBB);
+      else {
+#ifndef NDEBUG
+        FuncInfo.CatchInfoLost.insert(&I);
+#endif
+        // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+        unsigned Reg = TLI.getExceptionSelectorRegister();
+        if (Reg) CurMBB->addLiveIn(Reg);
+      }
+
+      // Insert the EHSELECTION instruction.
+      SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+      SDOperand Ops[2];
+      Ops[0] = getValue(I.getOperand(1));
+      Ops[1] = getRoot();
+      SDOperand Op = DAG.getNode(ISD::EHSELECTION, VTs, Ops, 2);
+      setValue(&I, Op);
+      DAG.setRoot(Op.getValue(1));
+    } else {
+      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+    }
+    
+    return 0;
+  }
+  
+  case Intrinsic::eh_typeid_for: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    
+    if (MMI) {
+      // Find the type id for the given typeinfo.
+      GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+
+      unsigned TypeID = MMI->getTypeIDFor(GV);
+      setValue(&I, DAG.getConstant(TypeID, MVT::i32));
+    } else {
+      // Return something different to eh_selector.
+      setValue(&I, DAG.getConstant(1, MVT::i32));
+    }
+
+    return 0;
+  }
+
+  case Intrinsic::eh_return: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+    if (MMI && ExceptionHandling) {
+      MMI->setCallsEHReturn(true);
+      DAG.setRoot(DAG.getNode(ISD::EH_RETURN,
+                              MVT::Other,
+                              getRoot(),
+                              getValue(I.getOperand(1)),
+                              getValue(I.getOperand(2))));
+    } else {
+      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+    }
+
+    return 0;
+  }
+
+   case Intrinsic::eh_unwind_init: {    
+     if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+       MMI->setCallsUnwindInit(true);
+     }
+
+     return 0;
+   }
+
+   case Intrinsic::eh_dwarf_cfa: {
+     if (ExceptionHandling) {
+       MVT::ValueType VT = getValue(I.getOperand(1)).getValueType();
+       SDOperand Offset = DAG.getNode(ISD::ADD,
+                                      TLI.getPointerTy(),
+                                      DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET,
+                                                  VT),
+                                      getValue(I.getOperand(1)));
+       setValue(&I, DAG.getNode(ISD::ADD,
+                                TLI.getPointerTy(),
+                                DAG.getNode(ISD::FRAMEADDR,
+                                            TLI.getPointerTy(),
+                                            DAG.getConstant(0,
+                                                            TLI.getPointerTy())),
+                                Offset));
+     } else {
+       setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+     }
+
+     return 0;
+  }
+
+  case Intrinsic::sqrt_f32:
+  case Intrinsic::sqrt_f64:
+    setValue(&I, DAG.getNode(ISD::FSQRT,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::powi_f32:
+  case Intrinsic::powi_f64:
+    setValue(&I, DAG.getNode(ISD::FPOWI,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1)),
+                             getValue(I.getOperand(2))));
+    return 0;
+  case Intrinsic::pcmarker: {
+    SDOperand Tmp = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::readcyclecounter: {
+    SDOperand Op = getRoot();
+    SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER,
+                                DAG.getNodeValueTypes(MVT::i64, MVT::Other), 2,
+                                &Op, 1);
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+  case Intrinsic::part_select: {
+    // Currently not implemented: just abort
+    assert(0 && "part_select intrinsic not implemented");
+    abort();
+  }
+  case Intrinsic::part_set: {
+    // Currently not implemented: just abort
+    assert(0 && "part_set intrinsic not implemented");
+    abort();
+  }
+  case Intrinsic::bswap:
+    setValue(&I, DAG.getNode(ISD::BSWAP,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::cttz: {
+    SDOperand Arg = getValue(I.getOperand(1));
+    MVT::ValueType Ty = Arg.getValueType();
+    SDOperand result = DAG.getNode(ISD::CTTZ, Ty, Arg);
+    if (Ty < MVT::i32)
+      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+    else if (Ty > MVT::i32)
+      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::ctlz: {
+    SDOperand Arg = getValue(I.getOperand(1));
+    MVT::ValueType Ty = Arg.getValueType();
+    SDOperand result = DAG.getNode(ISD::CTLZ, Ty, Arg);
+    if (Ty < MVT::i32)
+      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+    else if (Ty > MVT::i32)
+      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::ctpop: {
+    SDOperand Arg = getValue(I.getOperand(1));
+    MVT::ValueType Ty = Arg.getValueType();
+    SDOperand result = DAG.getNode(ISD::CTPOP, Ty, Arg);
+    if (Ty < MVT::i32)
+      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+    else if (Ty > MVT::i32)
+      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::stacksave: {
+    SDOperand Op = getRoot();
+    SDOperand Tmp = DAG.getNode(ISD::STACKSAVE,
+              DAG.getNodeValueTypes(TLI.getPointerTy(), MVT::Other), 2, &Op, 1);
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+  case Intrinsic::stackrestore: {
+    SDOperand Tmp = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::prefetch:
+    // FIXME: Currently discarding prefetches.
+    return 0;
+  
+  case Intrinsic::var_annotation:
+    // Discard annotate attributes
+    return 0;
+  }
+}
+
+
+void SelectionDAGLowering::LowerCallTo(Instruction &I,
+                                       const Type *CalledValueTy,
+                                       unsigned CallingConv,
+                                       bool IsTailCall,
+                                       SDOperand Callee, unsigned OpIdx,
+                                       MachineBasicBlock *LandingPad) {
+  const PointerType *PT = cast<PointerType>(CalledValueTy);
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  const ParamAttrsList *Attrs = FTy->getParamAttrs();
+  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+  unsigned BeginLabel = 0, EndLabel = 0;
+    
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Args.reserve(I.getNumOperands());
+  for (unsigned i = OpIdx, e = I.getNumOperands(); i != e; ++i) {
+    Value *Arg = I.getOperand(i);
+    SDOperand ArgNode = getValue(Arg);
+    Entry.Node = ArgNode; Entry.Ty = Arg->getType();
+
+    unsigned attrInd = i - OpIdx + 1;
+    Entry.isSExt  = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::SExt);
+    Entry.isZExt  = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::ZExt);
+    Entry.isInReg = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::InReg);
+    Entry.isSRet  = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::StructRet);
+    Args.push_back(Entry);
+  }
+
+  if (ExceptionHandling && MMI) {
+    // Insert a label before the invoke call to mark the try range.  This can be
+    // used to detect deletion of the invoke via the MachineModuleInfo.
+    BeginLabel = MMI->NextLabelID();
+    DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+                            DAG.getConstant(BeginLabel, MVT::i32)));
+  }
+  
+  std::pair<SDOperand,SDOperand> Result =
+    TLI.LowerCallTo(getRoot(), I.getType(), 
+                    Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt),
+                    FTy->isVarArg(), CallingConv, IsTailCall, 
+                    Callee, Args, DAG);
+  if (I.getType() != Type::VoidTy)
+    setValue(&I, Result.first);
+  DAG.setRoot(Result.second);
+
+  if (ExceptionHandling && MMI) {
+    // Insert a label at the end of the invoke call to mark the try range.  This
+    // can be used to detect deletion of the invoke via the MachineModuleInfo.
+    EndLabel = MMI->NextLabelID();
+    DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+                            DAG.getConstant(EndLabel, MVT::i32)));
+
+    // Inform MachineModuleInfo of range.    
+    MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+  }
+}
+
+
+void SelectionDAGLowering::visitCall(CallInst &I) {
+  const char *RenameFn = 0;
+  if (Function *F = I.getCalledFunction()) {
+    if (F->isDeclaration())
+      if (unsigned IID = F->getIntrinsicID()) {
+        RenameFn = visitIntrinsicCall(I, IID);
+        if (!RenameFn)
+          return;
+      } else {    // Not an LLVM intrinsic.
+        const std::string &Name = F->getName();
+        if (Name[0] == 'c' && (Name == "copysign" || Name == "copysignf")) {
+          if (I.getNumOperands() == 3 &&   // Basic sanity checks.
+              I.getOperand(1)->getType()->isFloatingPoint() &&
+              I.getType() == I.getOperand(1)->getType() &&
+              I.getType() == I.getOperand(2)->getType()) {
+            SDOperand LHS = getValue(I.getOperand(1));
+            SDOperand RHS = getValue(I.getOperand(2));
+            setValue(&I, DAG.getNode(ISD::FCOPYSIGN, LHS.getValueType(),
+                                     LHS, RHS));
+            return;
+          }
+        } else if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) {
+          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+              I.getOperand(1)->getType()->isFloatingPoint() &&
+              I.getType() == I.getOperand(1)->getType()) {
+            SDOperand Tmp = getValue(I.getOperand(1));
+            setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp));
+            return;
+          }
+        } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) {
+          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+              I.getOperand(1)->getType()->isFloatingPoint() &&
+              I.getType() == I.getOperand(1)->getType()) {
+            SDOperand Tmp = getValue(I.getOperand(1));
+            setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp));
+            return;
+          }
+        } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) {
+          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+              I.getOperand(1)->getType()->isFloatingPoint() &&
+              I.getType() == I.getOperand(1)->getType()) {
+            SDOperand Tmp = getValue(I.getOperand(1));
+            setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp));
+            return;
+          }
+        }
+      }
+  } else if (isa<InlineAsm>(I.getOperand(0))) {
+    visitInlineAsm(I);
+    return;
+  }
+
+  SDOperand Callee;
+  if (!RenameFn)
+    Callee = getValue(I.getOperand(0));
+  else
+    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+  LowerCallTo(I, I.getCalledValue()->getType(),
+              I.getCallingConv(),
+              I.isTailCall(),
+              Callee,
+              1);
+}
+
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value.  This uses 
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDOperand RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+                                        SDOperand &Chain, SDOperand *Flag)const{
+  // Copy the legal parts from the registers.
+  unsigned NumParts = Regs.size();
+  SmallVector<SDOperand, 8> Parts(NumParts);
+  for (unsigned i = 0; i != NumParts; ++i) {
+    SDOperand Part = Flag ?
+                     DAG.getCopyFromReg(Chain, Regs[i], RegVT, *Flag) :
+                     DAG.getCopyFromReg(Chain, Regs[i], RegVT);
+    Chain = Part.getValue(1);
+    if (Flag)
+      *Flag = Part.getValue(2);
+    Parts[i] = Part;
+  }
+  
+  // Assemble the legal parts into the final value.
+  return getCopyFromParts(DAG, &Parts[0], NumParts, RegVT, ValueVT);
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object.  This uses 
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDOperand Val, SelectionDAG &DAG,
+                                 SDOperand &Chain, SDOperand *Flag) const {
+  // Get the list of the values's legal parts.
+  unsigned NumParts = Regs.size();
+  SmallVector<SDOperand, 8> Parts(NumParts);
+  getCopyToParts(DAG, Val, &Parts[0], NumParts, RegVT);
+
+  // Copy the parts into the registers.
+  for (unsigned i = 0; i != NumParts; ++i) {
+    SDOperand Part = Flag ?
+                     DAG.getCopyToReg(Chain, Regs[i], Parts[i], *Flag) :
+                     DAG.getCopyToReg(Chain, Regs[i], Parts[i]);
+    Chain = Part.getValue(0);
+    if (Flag)
+      *Flag = Part.getValue(1);
+  }
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list.  This adds the code marker and includes the number of 
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG,
+                                        std::vector<SDOperand> &Ops) const {
+  MVT::ValueType IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+  Ops.push_back(DAG.getTargetConstant(Code | (Regs.size() << 3), IntPtrTy));
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(Regs[i], RegVT));
+}
+
+/// isAllocatableRegister - If the specified register is safe to allocate, 
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register.  Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+                      const TargetLowering &TLI, const MRegisterInfo *MRI) {
+  MVT::ValueType FoundVT = MVT::Other;
+  const TargetRegisterClass *FoundRC = 0;
+  for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(),
+       E = MRI->regclass_end(); RCI != E; ++RCI) {
+    MVT::ValueType ThisVT = MVT::Other;
+
+    const TargetRegisterClass *RC = *RCI;
+    // If none of the the value types for this register class are valid, we 
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (TLI.isTypeLegal(*I)) {
+        // If we have already found this register in a different register class,
+        // choose the one with the largest VT specified.  For example, on
+        // PowerPC, we favor f64 register classes over f32.
+        if (FoundVT == MVT::Other || 
+            MVT::getSizeInBits(FoundVT) < MVT::getSizeInBits(*I)) {
+          ThisVT = *I;
+          break;
+        }
+      }
+    }
+    
+    if (ThisVT == MVT::Other) continue;
+    
+    // NOTE: This isn't ideal.  In particular, this might allocate the
+    // frame pointer in functions that need it (due to them not being taken
+    // out of allocation, because a variable sized allocation hasn't been seen
+    // yet).  This is a slight code pessimization, but should still work.
+    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+         E = RC->allocation_order_end(MF); I != E; ++I)
+      if (*I == Reg) {
+        // We found a matching register class.  Keep looking at others in case
+        // we find one with larger registers that this physreg is also in.
+        FoundRC = RC;
+        FoundVT = ThisVT;
+        break;
+      }
+  }
+  return FoundRC;
+}    
+
+
+namespace {
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
+  /// ConstraintCode - This contains the actual string for the code, like "m".
+  std::string ConstraintCode;
+
+  /// ConstraintType - Information about the constraint code, e.g. Register,
+  /// RegisterClass, Memory, Other, Unknown.
+  TargetLowering::ConstraintType ConstraintType;
+  
+  /// CallOperand/CallOperandval - If this is the result output operand or a
+  /// clobber, this is null, otherwise it is the incoming operand to the
+  /// CallInst.  This gets modified as the asm is processed.
+  SDOperand CallOperand;
+  Value *CallOperandVal;
+  
+  /// ConstraintVT - The ValueType for the operand value.
+  MVT::ValueType ConstraintVT;
+  
+  /// AssignedRegs - If this is a register or register class operand, this
+  /// contains the set of register corresponding to the operand.
+  RegsForValue AssignedRegs;
+  
+  AsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+    : InlineAsm::ConstraintInfo(info), 
+      ConstraintType(TargetLowering::C_Unknown),
+      CallOperand(0,0), CallOperandVal(0), ConstraintVT(MVT::Other) {
+  }
+  
+  void ComputeConstraintToUse(const TargetLowering &TLI);
+  
+  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+  /// busy in OutputRegs/InputRegs.
+  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+                         std::set<unsigned> &OutputRegs, 
+                         std::set<unsigned> &InputRegs) const {
+     if (isOutReg)
+       OutputRegs.insert(AssignedRegs.Regs.begin(), AssignedRegs.Regs.end());
+     if (isInReg)
+       InputRegs.insert(AssignedRegs.Regs.begin(), AssignedRegs.Regs.end());
+   }
+};
+} // end anon namespace.
+
+/// getConstraintGenerality - Return an integer indicating how general CT is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+  switch (CT) {
+    default: assert(0 && "Unknown constraint type!");
+    case TargetLowering::C_Other:
+    case TargetLowering::C_Unknown:
+      return 0;
+    case TargetLowering::C_Register:
+      return 1;
+    case TargetLowering::C_RegisterClass:
+      return 2;
+    case TargetLowering::C_Memory:
+      return 3;
+  }
+}
+
+void AsmOperandInfo::ComputeConstraintToUse(const TargetLowering &TLI) {
+  assert(!Codes.empty() && "Must have at least one constraint");
+  
+  std::string *Current = &Codes[0];
+  TargetLowering::ConstraintType CurType = TLI.getConstraintType(*Current);
+  if (Codes.size() == 1) {   // Single-letter constraints ('r') are very common.
+    ConstraintCode = *Current;
+    ConstraintType = CurType;
+    return;
+  }
+  
+  unsigned CurGenerality = getConstraintGenerality(CurType);
+  
+  // If we have multiple constraints, try to pick the most general one ahead
+  // of time.  This isn't a wonderful solution, but handles common cases.
+  for (unsigned j = 1, e = Codes.size(); j != e; ++j) {
+    TargetLowering::ConstraintType ThisType = TLI.getConstraintType(Codes[j]);
+    unsigned ThisGenerality = getConstraintGenerality(ThisType);
+    if (ThisGenerality > CurGenerality) {
+      // This constraint letter is more general than the previous one,
+      // use it.
+      CurType = ThisType;
+      Current = &Codes[j];
+      CurGenerality = ThisGenerality;
+    }
+  }
+  
+  ConstraintCode = *Current;
+  ConstraintType = CurType;
+}
+
+
+void SelectionDAGLowering::
+GetRegistersForValue(AsmOperandInfo &OpInfo, bool HasEarlyClobber,
+                     std::set<unsigned> &OutputRegs, 
+                     std::set<unsigned> &InputRegs) {
+  // Compute whether this value requires an input register, an output register,
+  // or both.
+  bool isOutReg = false;
+  bool isInReg = false;
+  switch (OpInfo.Type) {
+  case InlineAsm::isOutput:
+    isOutReg = true;
+    
+    // If this is an early-clobber output, or if there is an input
+    // constraint that matches this, we need to reserve the input register
+    // so no other inputs allocate to it.
+    isInReg = OpInfo.isEarlyClobber || OpInfo.hasMatchingInput;
+    break;
+  case InlineAsm::isInput:
+    isInReg = true;
+    isOutReg = false;
+    break;
+  case InlineAsm::isClobber:
+    isOutReg = true;
+    isInReg = true;
+    break;
+  }
+  
+  
+  MachineFunction &MF = DAG.getMachineFunction();
+  std::vector<unsigned> Regs;
+  
+  // If this is a constraint for a single physreg, or a constraint for a
+  // register class, find it.
+  std::pair<unsigned, const TargetRegisterClass*> PhysReg = 
+    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                     OpInfo.ConstraintVT);
+
+  unsigned NumRegs = 1;
+  if (OpInfo.ConstraintVT != MVT::Other)
+    NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT);
+  MVT::ValueType RegVT;
+  MVT::ValueType ValueVT = OpInfo.ConstraintVT;
+  
+
+  // If this is a constraint for a specific physical register, like {r17},
+  // assign it now.
+  if (PhysReg.first) {
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = *PhysReg.second->vt_begin();
+    
+    // Get the actual register value type.  This is important, because the user
+    // may have asked for (e.g.) the AX register in i32 type.  We need to
+    // remember that AX is actually i16 to get the right extension.
+    RegVT = *PhysReg.second->vt_begin();
+    
+    // This is a explicit reference to a physical register.
+    Regs.push_back(PhysReg.first);
+
+    // If this is an expanded reference, add the rest of the regs to Regs.
+    if (NumRegs != 1) {
+      TargetRegisterClass::iterator I = PhysReg.second->begin();
+      TargetRegisterClass::iterator E = PhysReg.second->end();
+      for (; *I != PhysReg.first; ++I)
+        assert(I != E && "Didn't find reg!"); 
+      
+      // Already added the first reg.
+      --NumRegs; ++I;
+      for (; NumRegs; --NumRegs, ++I) {
+        assert(I != E && "Ran out of registers to allocate!");
+        Regs.push_back(*I);
+      }
+    }
+    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+    return;
+  }
+  
+  // Otherwise, if this was a reference to an LLVM register class, create vregs
+  // for this reference.
+  std::vector<unsigned> RegClassRegs;
+  const TargetRegisterClass *RC = PhysReg.second;
+  if (RC) {
+    // If this is an early clobber or tied register, our regalloc doesn't know
+    // how to maintain the constraint.  If it isn't, go ahead and create vreg
+    // and let the regalloc do the right thing.
+    if (!OpInfo.hasMatchingInput && !OpInfo.isEarlyClobber &&
+        // If there is some other early clobber and this is an input register,
+        // then we are forced to pre-allocate the input reg so it doesn't
+        // conflict with the earlyclobber.
+        !(OpInfo.Type == InlineAsm::isInput && HasEarlyClobber)) {
+      RegVT = *PhysReg.second->vt_begin();
+      
+      if (OpInfo.ConstraintVT == MVT::Other)
+        ValueVT = RegVT;
+
+      // Create the appropriate number of virtual registers.
+      SSARegMap *RegMap = MF.getSSARegMap();
+      for (; NumRegs; --NumRegs)
+        Regs.push_back(RegMap->createVirtualRegister(PhysReg.second));
+      
+      OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+      return;
+    }
+    
+    // Otherwise, we can't allocate it.  Let the code below figure out how to
+    // maintain these constraints.
+    RegClassRegs.assign(PhysReg.second->begin(), PhysReg.second->end());
+    
+  } else {
+    // This is a reference to a register class that doesn't directly correspond
+    // to an LLVM register class.  Allocate NumRegs consecutive, available,
+    // registers from the class.
+    RegClassRegs = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                                         OpInfo.ConstraintVT);
+  }
+  
+  const MRegisterInfo *MRI = DAG.getTarget().getRegisterInfo();
+  unsigned NumAllocated = 0;
+  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+    unsigned Reg = RegClassRegs[i];
+    // See if this register is available.
+    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
+        (isInReg  && InputRegs.count(Reg))) {    // Already used.
+      // Make sure we find consecutive registers.
+      NumAllocated = 0;
+      continue;
+    }
+    
+    // Check to see if this register is allocatable (i.e. don't give out the
+    // stack pointer).
+    if (RC == 0) {
+      RC = isAllocatableRegister(Reg, MF, TLI, MRI);
+      if (!RC) {        // Couldn't allocate this register.
+        // Reset NumAllocated to make sure we return consecutive registers.
+        NumAllocated = 0;
+        continue;
+      }
+    }
+    
+    // Okay, this register is good, we can use it.
+    ++NumAllocated;
+
+    // If we allocated enough consecutive registers, succeed.
+    if (NumAllocated == NumRegs) {
+      unsigned RegStart = (i-NumAllocated)+1;
+      unsigned RegEnd   = i+1;
+      // Mark all of the allocated registers used.
+      for (unsigned i = RegStart; i != RegEnd; ++i)
+        Regs.push_back(RegClassRegs[i]);
+      
+      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), 
+                                         OpInfo.ConstraintVT);
+      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+      return;
+    }
+  }
+  
+  // Otherwise, we couldn't allocate enough registers for this.
+  return;
+}
+
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGLowering::visitInlineAsm(CallInst &I) {
+  InlineAsm *IA = cast<InlineAsm>(I.getOperand(0));
+
+  /// ConstraintOperands - Information about all of the constraints.
+  std::vector<AsmOperandInfo> ConstraintOperands;
+  
+  SDOperand Chain = getRoot();
+  SDOperand Flag;
+  
+  std::set<unsigned> OutputRegs, InputRegs;
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  std::vector<InlineAsm::ConstraintInfo>
+    ConstraintInfos = IA->ParseConstraints();
+
+  // SawEarlyClobber - Keep track of whether we saw an earlyclobber output
+  // constraint.  If so, we can't let the register allocator allocate any input
+  // registers, because it will not know to avoid the earlyclobbered output reg.
+  bool SawEarlyClobber = false;
+  
+  unsigned OpNo = 1;   // OpNo - The operand of the CallInst.
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+    AsmOperandInfo &OpInfo = ConstraintOperands.back();
+    
+    MVT::ValueType OpVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      if (!OpInfo.isIndirect) {
+        // The return value of the call is this value.  As such, there is no
+        // corresponding argument.
+        assert(I.getType() != Type::VoidTy && "Bad inline asm!");
+        OpVT = TLI.getValueType(I.getType());
+      } else {
+        OpInfo.CallOperandVal = I.getOperand(OpNo++);
+      }
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = I.getOperand(OpNo++);
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    // If this is an input or an indirect output, process the call argument.
+    if (OpInfo.CallOperandVal) {
+      OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+      const Type *OpTy = OpInfo.CallOperandVal->getType();
+      // If this is an indirect operand, the operand is a pointer to the
+      // accessed type.
+      if (OpInfo.isIndirect)
+        OpTy = cast<PointerType>(OpTy)->getElementType();
+      
+      // If OpTy is not a first-class value, it may be a struct/union that we
+      // can tile with integers.
+      if (!OpTy->isFirstClassType() && OpTy->isSized()) {
+        unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+        switch (BitSize) {
+        default: break;
+        case 1:
+        case 8:
+        case 16:
+        case 32:
+        case 64:
+          OpTy = IntegerType::get(BitSize);
+          break;
+        }
+      }
+      
+      OpVT = TLI.getValueType(OpTy, true);
+    }
+    
+    OpInfo.ConstraintVT = OpVT;
+    
+    // Compute the constraint code and ConstraintType to use.
+    OpInfo.ComputeConstraintToUse(TLI);
+
+    // Keep track of whether we see an earlyclobber.
+    SawEarlyClobber |= OpInfo.isEarlyClobber;
+    
+    // If this is a memory input, and if the operand is not indirect, do what we
+    // need to to provide an address for the memory input.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        !OpInfo.isIndirect) {
+      assert(OpInfo.Type == InlineAsm::isInput &&
+             "Can only indirectify direct input operands!");
+      
+      // Memory operands really want the address of the value.  If we don't have
+      // an indirect input, put it in the constpool if we can, otherwise spill
+      // it to a stack slot.
+      
+      // If the operand is a float, integer, or vector constant, spill to a
+      // constant pool entry to get its address.
+      Value *OpVal = OpInfo.CallOperandVal;
+      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+          isa<ConstantVector>(OpVal)) {
+        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+                                                 TLI.getPointerTy());
+      } else {
+        // Otherwise, create a stack slot and emit a store to it before the
+        // asm.
+        const Type *Ty = OpVal->getType();
+        uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
+        SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        Chain = DAG.getStore(Chain, OpInfo.CallOperand, StackSlot, NULL, 0);
+        OpInfo.CallOperand = StackSlot;
+      }
+     
+      // There is no longer a Value* corresponding to this operand.
+      OpInfo.CallOperandVal = 0;
+      // It is now an indirect operand.
+      OpInfo.isIndirect = true;
+    }
+    
+    // If this constraint is for a specific register, allocate it before
+    // anything else.
+    if (OpInfo.ConstraintType == TargetLowering::C_Register)
+      GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs);
+  }
+  ConstraintInfos.clear();
+  
+  
+  // Second pass - Loop over all of the operands, assigning virtual or physregs
+  // to registerclass operands.
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    AsmOperandInfo &OpInfo = ConstraintOperands[i];
+    
+    // C_Register operands have already been allocated, Other/Memory don't need
+    // to be.
+    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+      GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs);
+  }    
+  
+  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+  std::vector<SDOperand> AsmNodeOperands;
+  AsmNodeOperands.push_back(SDOperand());  // reserve space for input chain
+  AsmNodeOperands.push_back(
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+  
+  
+  // Loop over all of the inputs, copying the operand values into the
+  // appropriate registers and processing the output regs.
+  RegsForValue RetValRegs;
+  
+  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+  
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    AsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput: {
+      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+          OpInfo.ConstraintType != TargetLowering::C_Register) {
+        // Memory output, or 'other' output (e.g. 'X' constraint).
+        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+        // Add information to the INLINEASM node to know about this output.
+        unsigned ResOpType = 4/*MEM*/ | (1 << 3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(OpInfo.CallOperand);
+        break;
+      }
+
+      // Otherwise, this is a register or register class output.
+
+      // Copy the output from the appropriate register.  Find a register that
+      // we can use.
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        cerr << "Couldn't allocate output reg for contraint '"
+             << OpInfo.ConstraintCode << "'!\n";
+        exit(1);
+      }
+
+      if (!OpInfo.isIndirect) {
+        // This is the result value of the call.
+        assert(RetValRegs.Regs.empty() &&
+               "Cannot have multiple output constraints yet!");
+        assert(I.getType() != Type::VoidTy && "Bad inline asm!");
+        RetValRegs = OpInfo.AssignedRegs;
+      } else {
+        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+                                                      OpInfo.CallOperandVal));
+      }
+      
+      // Add information to the INLINEASM node to know that this register is
+      // set.
+      OpInfo.AssignedRegs.AddInlineAsmOperands(2 /*REGDEF*/, DAG,
+                                               AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isInput: {
+      SDOperand InOperandVal = OpInfo.CallOperand;
+      
+      if (isdigit(OpInfo.ConstraintCode[0])) {    // Matching constraint?
+        // If this is required to match an output register we have already set,
+        // just use its register.
+        unsigned OperandNo = atoi(OpInfo.ConstraintCode.c_str());
+        
+        // Scan until we find the definition we already emitted of this operand.
+        // When we find it, create a RegsForValue operand.
+        unsigned CurOp = 2;  // The first operand.
+        for (; OperandNo; --OperandNo) {
+          // Advance to the next operand.
+          unsigned NumOps = 
+            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue();
+          assert(((NumOps & 7) == 2 /*REGDEF*/ ||
+                  (NumOps & 7) == 4 /*MEM*/) &&
+                 "Skipped past definitions?");
+          CurOp += (NumOps>>3)+1;
+        }
+
+        unsigned NumOps = 
+          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue();
+        if ((NumOps & 7) == 2 /*REGDEF*/) {
+          // Add NumOps>>3 registers to MatchedRegs.
+          RegsForValue MatchedRegs;
+          MatchedRegs.ValueVT = InOperandVal.getValueType();
+          MatchedRegs.RegVT   = AsmNodeOperands[CurOp+1].getValueType();
+          for (unsigned i = 0, e = NumOps>>3; i != e; ++i) {
+            unsigned Reg =
+              cast<RegisterSDNode>(AsmNodeOperands[++CurOp])->getReg();
+            MatchedRegs.Regs.push_back(Reg);
+          }
+        
+          // Use the produced MatchedRegs object to 
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag);
+          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, DAG, AsmNodeOperands);
+          break;
+        } else {
+          assert((NumOps & 7) == 4/*MEM*/ && "Unknown matching constraint!");
+          assert(0 && "matching constraints for memory operands unimp");
+        }
+      }
+      
+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+        assert(!OpInfo.isIndirect && 
+               "Don't know how to handle indirect other inputs yet!");
+        
+        InOperandVal = TLI.isOperandValidForConstraint(InOperandVal,
+                                                       OpInfo.ConstraintCode[0],
+                                                       DAG);
+        if (!InOperandVal.Val) {
+          cerr << "Invalid operand for inline asm constraint '"
+               << OpInfo.ConstraintCode << "'!\n";
+          exit(1);
+        }
+        
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = 3 /*IMM*/ | (1 << 3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(InOperandVal);
+        break;
+      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+               "Memory operands expect pointer values");
+               
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = 4/*MEM*/ | (1 << 3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(InOperandVal);
+        break;
+      }
+        
+      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+              OpInfo.ConstraintType == TargetLowering::C_Register) &&
+             "Unknown constraint type!");
+      assert(!OpInfo.isIndirect && 
+             "Don't know how to handle indirect register inputs yet!");
+
+      // Copy the input into the appropriate registers.
+      assert(!OpInfo.AssignedRegs.Regs.empty() &&
+             "Couldn't allocate input reg!");
+
+      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag);
+      
+      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, DAG,
+                                               AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isClobber: {
+      // Add the clobbered value to the operand list, so that the register
+      // allocator is aware that the physreg got clobbered.
+      if (!OpInfo.AssignedRegs.Regs.empty())
+        OpInfo.AssignedRegs.AddInlineAsmOperands(2/*REGDEF*/, DAG,
+                                                 AsmNodeOperands);
+      break;
+    }
+    }
+  }
+  
+  // Finish up input operands.
+  AsmNodeOperands[0] = Chain;
+  if (Flag.Val) AsmNodeOperands.push_back(Flag);
+  
+  Chain = DAG.getNode(ISD::INLINEASM, 
+                      DAG.getNodeValueTypes(MVT::Other, MVT::Flag), 2,
+                      &AsmNodeOperands[0], AsmNodeOperands.size());
+  Flag = Chain.getValue(1);
+
+  // If this asm returns a register value, copy the result from that register
+  // and set it as the value of the call.
+  if (!RetValRegs.Regs.empty()) {
+    SDOperand Val = RetValRegs.getCopyFromRegs(DAG, Chain, &Flag);
+    
+    // If the result of the inline asm is a vector, it may have the wrong
+    // width/num elts.  Make sure to convert it to the right type with
+    // bit_convert.
+    if (MVT::isVector(Val.getValueType())) {
+      const VectorType *VTy = cast<VectorType>(I.getType());
+      MVT::ValueType DesiredVT = TLI.getValueType(VTy);
+      
+      Val = DAG.getNode(ISD::BIT_CONVERT, DesiredVT, Val);
+    }
+    
+    setValue(&I, Val);
+  }
+  
+  std::vector<std::pair<SDOperand, Value*> > StoresToEmit;
+  
+  // Process indirect outputs, first output all of the flagged copies out of
+  // physregs.
+  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+    Value *Ptr = IndirectStoresToEmit[i].second;
+    SDOperand OutVal = OutRegs.getCopyFromRegs(DAG, Chain, &Flag);
+    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+  }
+  
+  // Emit the non-flagged stores from the physregs.
+  SmallVector<SDOperand, 8> OutChains;
+  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
+    OutChains.push_back(DAG.getStore(Chain, StoresToEmit[i].first,
+                                    getValue(StoresToEmit[i].second),
+                                    StoresToEmit[i].second, 0));
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+                        &OutChains[0], OutChains.size());
+  DAG.setRoot(Chain);
+}
+
+
+void SelectionDAGLowering::visitMalloc(MallocInst &I) {
+  SDOperand Src = getValue(I.getOperand(0));
+
+  MVT::ValueType IntPtr = TLI.getPointerTy();
+
+  if (IntPtr < Src.getValueType())
+    Src = DAG.getNode(ISD::TRUNCATE, IntPtr, Src);
+  else if (IntPtr > Src.getValueType())
+    Src = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, Src);
+
+  // Scale the source by the type size.
+  uint64_t ElementSize = TD->getTypeSize(I.getType()->getElementType());
+  Src = DAG.getNode(ISD::MUL, Src.getValueType(),
+                    Src, getIntPtrConstant(ElementSize));
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Src;
+  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Args.push_back(Entry);
+
+  std::pair<SDOperand,SDOperand> Result =
+    TLI.LowerCallTo(getRoot(), I.getType(), false, false, CallingConv::C, true,
+                    DAG.getExternalSymbol("malloc", IntPtr),
+                    Args, DAG);
+  setValue(&I, Result.first);  // Pointers always fit in registers
+  DAG.setRoot(Result.second);
+}
+
+void SelectionDAGLowering::visitFree(FreeInst &I) {
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = getValue(I.getOperand(0));
+  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Args.push_back(Entry);
+  MVT::ValueType IntPtr = TLI.getPointerTy();
+  std::pair<SDOperand,SDOperand> Result =
+    TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, CallingConv::C, true,
+                    DAG.getExternalSymbol("free", IntPtr), Args, DAG);
+  DAG.setRoot(Result.second);
+}
+
+// InsertAtEndOfBasicBlock - This method should be implemented by targets that
+// mark instructions with the 'usesCustomDAGSchedInserter' flag.  These
+// instructions are special in various ways, which require special support to
+// insert.  The specified MachineInstr is created but not inserted into any
+// basic blocks, and the scheduler passes ownership of it to this method.
+MachineBasicBlock *TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+                                                       MachineBasicBlock *MBB) {
+  cerr << "If a target marks an instruction with "
+       << "'usesCustomDAGSchedInserter', it must implement "
+       << "TargetLowering::InsertAtEndOfBasicBlock!\n";
+  abort();
+  return 0;  
+}
+
+void SelectionDAGLowering::visitVAStart(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VASTART, MVT::Other, getRoot(), 
+                          getValue(I.getOperand(1)), 
+                          DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
+  SDOperand V = DAG.getVAArg(TLI.getValueType(I.getType()), getRoot(),
+                             getValue(I.getOperand(0)),
+                             DAG.getSrcValue(I.getOperand(0)));
+  setValue(&I, V);
+  DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGLowering::visitVAEnd(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VAEND, MVT::Other, getRoot(),
+                          getValue(I.getOperand(1)), 
+                          DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVACopy(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VACOPY, MVT::Other, getRoot(), 
+                          getValue(I.getOperand(1)), 
+                          getValue(I.getOperand(2)),
+                          DAG.getSrcValue(I.getOperand(1)),
+                          DAG.getSrcValue(I.getOperand(2))));
+}
+
+/// TargetLowering::LowerArguments - This is the default LowerArguments
+/// implementation, which just inserts a FORMAL_ARGUMENTS node.  FIXME: When all
+/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be 
+/// integrated into SDISel.
+std::vector<SDOperand> 
+TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
+  const FunctionType *FTy = F.getFunctionType();
+  const ParamAttrsList *Attrs = FTy->getParamAttrs();
+  // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node.
+  std::vector<SDOperand> Ops;
+  Ops.push_back(DAG.getRoot());
+  Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy()));
+  Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy()));
+
+  // Add one result value for each formal argument.
+  std::vector<MVT::ValueType> RetVals;
+  unsigned j = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++j) {
+    MVT::ValueType VT = getValueType(I->getType());
+    unsigned Flags = ISD::ParamFlags::NoFlagSet;
+    unsigned OriginalAlignment =
+      getTargetData()->getABITypeAlignment(I->getType());
+
+    // FIXME: Distinguish between a formal with no [sz]ext attribute from one
+    // that is zero extended!
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::ZExt))
+      Flags &= ~(ISD::ParamFlags::SExt);
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::SExt))
+      Flags |= ISD::ParamFlags::SExt;
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::InReg))
+      Flags |= ISD::ParamFlags::InReg;
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::StructRet))
+      Flags |= ISD::ParamFlags::StructReturn;
+    if (Attrs && Attrs->paramHasAttr(j, ParamAttr::ByVal))
+      Flags |= ISD::ParamFlags::ByVal;
+    Flags |= (OriginalAlignment << ISD::ParamFlags::OrigAlignmentOffs);
+    
+    switch (getTypeAction(VT)) {
+    default: assert(0 && "Unknown type action!");
+    case Legal: 
+      RetVals.push_back(VT);
+      Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      break;
+    case Promote:
+      RetVals.push_back(getTypeToTransformTo(VT));
+      Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      break;
+    case Expand: {
+      // If this is an illegal type, it needs to be broken up to fit into 
+      // registers.
+      MVT::ValueType RegisterVT = getRegisterType(VT);
+      unsigned NumRegs = getNumRegisters(VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        RetVals.push_back(RegisterVT);
+        // if it isn't first piece, alignment must be 1
+        if (i > 0)
+          Flags = (Flags & (~ISD::ParamFlags::OrigAlignment)) |
+            (1 << ISD::ParamFlags::OrigAlignmentOffs);
+        Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      }
+      break;
+    }
+    }
+  }
+
+  RetVals.push_back(MVT::Other);
+  
+  // Create the node.
+  SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS,
+                               DAG.getNodeValueTypes(RetVals), RetVals.size(),
+                               &Ops[0], Ops.size()).Val;
+  unsigned NumArgRegs = Result->getNumValues() - 1;
+  DAG.setRoot(SDOperand(Result, NumArgRegs));
+
+  // Set up the return result vector.
+  Ops.clear();
+  unsigned i = 0;
+  unsigned Idx = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; 
+      ++I, ++Idx) {
+    MVT::ValueType VT = getValueType(I->getType());
+    
+    switch (getTypeAction(VT)) {
+    default: assert(0 && "Unknown type action!");
+    case Legal: 
+      Ops.push_back(SDOperand(Result, i++));
+      break;
+    case Promote: {
+      SDOperand Op(Result, i++);
+      if (MVT::isInteger(VT)) {
+        if (Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt))
+          Op = DAG.getNode(ISD::AssertSext, Op.getValueType(), Op,
+                           DAG.getValueType(VT));
+        else if (Attrs && Attrs->paramHasAttr(Idx, ParamAttr::ZExt))
+          Op = DAG.getNode(ISD::AssertZext, Op.getValueType(), Op,
+                           DAG.getValueType(VT));
+        Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+      } else {
+        assert(MVT::isFloatingPoint(VT) && "Not int or FP?");
+        Op = DAG.getNode(ISD::FP_ROUND, VT, Op);
+      }
+      Ops.push_back(Op);
+      break;
+    }
+    case Expand: {
+      MVT::ValueType PartVT = getRegisterType(VT);
+      unsigned NumParts = getNumRegisters(VT);
+      SmallVector<SDOperand, 4> Parts(NumParts);
+      for (unsigned j = 0; j != NumParts; ++j)
+        Parts[j] = SDOperand(Result, i++);
+      Ops.push_back(getCopyFromParts(DAG, &Parts[0], NumParts, PartVT, VT));
+      break;
+    }
+    }
+  }
+  assert(i == NumArgRegs && "Argument register count mismatch!");
+  return Ops;
+}
+
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just inserts an ISD::CALL node, which is later custom
+/// lowered by the target to something concrete.  FIXME: When all targets are
+/// migrated to using ISD::CALL, this hook should be integrated into SDISel.
+std::pair<SDOperand, SDOperand>
+TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 
+                            bool RetTyIsSigned, bool isVarArg,
+                            unsigned CallingConv, bool isTailCall, 
+                            SDOperand Callee,
+                            ArgListTy &Args, SelectionDAG &DAG) {
+  SmallVector<SDOperand, 32> Ops;
+  Ops.push_back(Chain);   // Op#0 - Chain
+  Ops.push_back(DAG.getConstant(CallingConv, getPointerTy())); // Op#1 - CC
+  Ops.push_back(DAG.getConstant(isVarArg, getPointerTy()));    // Op#2 - VarArg
+  Ops.push_back(DAG.getConstant(isTailCall, getPointerTy()));  // Op#3 - Tail
+  Ops.push_back(Callee);
+  
+  // Handle all of the outgoing arguments.
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    MVT::ValueType VT = getValueType(Args[i].Ty);
+    SDOperand Op = Args[i].Node;
+    unsigned Flags = ISD::ParamFlags::NoFlagSet;
+    unsigned OriginalAlignment =
+      getTargetData()->getABITypeAlignment(Args[i].Ty);
+    
+    if (Args[i].isSExt)
+      Flags |= ISD::ParamFlags::SExt;
+    if (Args[i].isZExt)
+      Flags |= ISD::ParamFlags::ZExt;
+    if (Args[i].isInReg)
+      Flags |= ISD::ParamFlags::InReg;
+    if (Args[i].isSRet)
+      Flags |= ISD::ParamFlags::StructReturn;
+    Flags |= OriginalAlignment << ISD::ParamFlags::OrigAlignmentOffs;
+    
+    switch (getTypeAction(VT)) {
+    default: assert(0 && "Unknown type action!");
+    case Legal:
+      Ops.push_back(Op);
+      Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      break;
+    case Promote:
+      if (MVT::isInteger(VT)) {
+        unsigned ExtOp;
+        if (Args[i].isSExt)
+          ExtOp = ISD::SIGN_EXTEND;
+        else if (Args[i].isZExt)
+          ExtOp = ISD::ZERO_EXTEND;
+        else
+          ExtOp = ISD::ANY_EXTEND;
+        Op = DAG.getNode(ExtOp, getTypeToTransformTo(VT), Op);
+      } else {
+        assert(MVT::isFloatingPoint(VT) && "Not int or FP?");
+        Op = DAG.getNode(ISD::FP_EXTEND, getTypeToTransformTo(VT), Op);
+      }
+      Ops.push_back(Op);
+      Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+      break;
+    case Expand: {
+      MVT::ValueType PartVT = getRegisterType(VT);
+      unsigned NumParts = getNumRegisters(VT);
+      SmallVector<SDOperand, 4> Parts(NumParts);
+      getCopyToParts(DAG, Op, &Parts[0], NumParts, PartVT);
+      for (unsigned i = 0; i != NumParts; ++i) {
+        // if it isn't first piece, alignment must be 1
+        unsigned MyFlags = Flags;
+        if (i != 0)
+          MyFlags = (MyFlags & (~ISD::ParamFlags::OrigAlignment)) |
+            (1 << ISD::ParamFlags::OrigAlignmentOffs);
+
+        Ops.push_back(Parts[i]);
+        Ops.push_back(DAG.getConstant(MyFlags, MVT::i32));
+      }
+      break;
+    }
+    }
+  }
+  
+  // Figure out the result value types.
+  MVT::ValueType VT = getValueType(RetTy);
+  MVT::ValueType RegisterVT = getRegisterType(VT);
+  unsigned NumRegs = getNumRegisters(VT);
+  SmallVector<MVT::ValueType, 4> RetTys(NumRegs);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    RetTys[i] = RegisterVT;
+  
+  RetTys.push_back(MVT::Other);  // Always has a chain.
+  
+  // Create the CALL node.
+  SDOperand Res = DAG.getNode(ISD::CALL,
+                              DAG.getVTList(&RetTys[0], NumRegs + 1),
+                              &Ops[0], Ops.size());
+  SDOperand Chain = Res.getValue(NumRegs);
+
+  // Gather up the call result into a single value.
+  if (RetTy != Type::VoidTy) {
+    ISD::NodeType AssertOp = ISD::AssertSext;
+    if (!RetTyIsSigned)
+      AssertOp = ISD::AssertZext;
+    SmallVector<SDOperand, 4> Results(NumRegs);
+    for (unsigned i = 0; i != NumRegs; ++i)
+      Results[i] = Res.getValue(i);
+    Res = getCopyFromParts(DAG, &Results[0], NumRegs, RegisterVT, VT, AssertOp);
+  }
+
+  return std::make_pair(Res, Chain);
+}
+
+SDOperand TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+  assert(0 && "LowerOperation not implemented for this target!");
+  abort();
+  return SDOperand();
+}
+
+SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op,
+                                                 SelectionDAG &DAG) {
+  assert(0 && "CustomPromoteOperation not implemented for this target!");
+  abort();
+  return SDOperand();
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
+                                SelectionDAG &DAG) {
+  MVT::ValueType CurVT = VT;
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+    uint64_t Val   = C->getValue() & 255;
+    unsigned Shift = 8;
+    while (CurVT != MVT::i8) {
+      Val = (Val << Shift) | Val;
+      Shift <<= 1;
+      CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+    }
+    return DAG.getConstant(Val, VT);
+  } else {
+    Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
+    unsigned Shift = 8;
+    while (CurVT != MVT::i8) {
+      Value =
+        DAG.getNode(ISD::OR, VT,
+                    DAG.getNode(ISD::SHL, VT, Value,
+                                DAG.getConstant(Shift, MVT::i8)), Value);
+      Shift <<= 1;
+      CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+    }
+
+    return Value;
+  }
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDOperand getMemsetStringVal(MVT::ValueType VT,
+                                    SelectionDAG &DAG, TargetLowering &TLI,
+                                    std::string &Str, unsigned Offset) {
+  uint64_t Val = 0;
+  unsigned MSB = MVT::getSizeInBits(VT) / 8;
+  if (TLI.isLittleEndian())
+    Offset = Offset + MSB - 1;
+  for (unsigned i = 0; i != MSB; ++i) {
+    Val = (Val << 8) | (unsigned char)Str[Offset];
+    Offset += TLI.isLittleEndian() ? -1 : 1;
+  }
+  return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the 
+static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
+                                      SelectionDAG &DAG, TargetLowering &TLI) {
+  MVT::ValueType VT = Base.getValueType();
+  return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of  memory ops to perform memset / memcpy.
+static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
+                                     unsigned Limit, uint64_t Size,
+                                     unsigned Align, TargetLowering &TLI) {
+  MVT::ValueType VT;
+
+  if (TLI.allowsUnalignedMemoryAccesses()) {
+    VT = MVT::i64;
+  } else {
+    switch (Align & 7) {
+    case 0:
+      VT = MVT::i64;
+      break;
+    case 4:
+      VT = MVT::i32;
+      break;
+    case 2:
+      VT = MVT::i16;
+      break;
+    default:
+      VT = MVT::i8;
+      break;
+    }
+  }
+
+  MVT::ValueType LVT = MVT::i64;
+  while (!TLI.isTypeLegal(LVT))
+    LVT = (MVT::ValueType)((unsigned)LVT - 1);
+  assert(MVT::isInteger(LVT));
+
+  if (VT > LVT)
+    VT = LVT;
+
+  unsigned NumMemOps = 0;
+  while (Size != 0) {
+    unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+    while (VTSize > Size) {
+      VT = (MVT::ValueType)((unsigned)VT - 1);
+      VTSize >>= 1;
+    }
+    assert(MVT::isInteger(VT));
+
+    if (++NumMemOps > Limit)
+      return false;
+    MemOps.push_back(VT);
+    Size -= VTSize;
+  }
+
+  return true;
+}
+
+void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
+  SDOperand Op1 = getValue(I.getOperand(1));
+  SDOperand Op2 = getValue(I.getOperand(2));
+  SDOperand Op3 = getValue(I.getOperand(3));
+  SDOperand Op4 = getValue(I.getOperand(4));
+  unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue();
+  if (Align == 0) Align = 1;
+
+  if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) {
+    std::vector<MVT::ValueType> MemOps;
+
+    // Expand memset / memcpy to a series of load / store ops
+    // if the size operand falls below a certain threshold.
+    SmallVector<SDOperand, 8> OutChains;
+    switch (Op) {
+    default: break;  // Do nothing for now.
+    case ISD::MEMSET: {
+      if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
+                                   Size->getValue(), Align, TLI)) {
+        unsigned NumMemOps = MemOps.size();
+        unsigned Offset = 0;
+        for (unsigned i = 0; i < NumMemOps; i++) {
+          MVT::ValueType VT = MemOps[i];
+          unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+          SDOperand Value = getMemsetValue(Op2, VT, DAG);
+          SDOperand Store = DAG.getStore(getRoot(), Value,
+                                    getMemBasePlusOffset(Op1, Offset, DAG, TLI),
+                                         I.getOperand(1), Offset);
+          OutChains.push_back(Store);
+          Offset += VTSize;
+        }
+      }
+      break;
+    }
+    case ISD::MEMCPY: {
+      if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(),
+                                   Size->getValue(), Align, TLI)) {
+        unsigned NumMemOps = MemOps.size();
+        unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0;
+        GlobalAddressSDNode *G = NULL;
+        std::string Str;
+        bool CopyFromStr = false;
+
+        if (Op2.getOpcode() == ISD::GlobalAddress)
+          G = cast<GlobalAddressSDNode>(Op2);
+        else if (Op2.getOpcode() == ISD::ADD &&
+                 Op2.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+                 Op2.getOperand(1).getOpcode() == ISD::Constant) {
+          G = cast<GlobalAddressSDNode>(Op2.getOperand(0));
+          SrcDelta = cast<ConstantSDNode>(Op2.getOperand(1))->getValue();
+        }
+        if (G) {
+          GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+          if (GV && GV->isConstant()) {
+            Str = GV->getStringValue(false);
+            if (!Str.empty()) {
+              CopyFromStr = true;
+              SrcOff += SrcDelta;
+            }
+          }
+        }
+
+        for (unsigned i = 0; i < NumMemOps; i++) {
+          MVT::ValueType VT = MemOps[i];
+          unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+          SDOperand Value, Chain, Store;
+
+          if (CopyFromStr) {
+            Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
+            Chain = getRoot();
+            Store =
+              DAG.getStore(Chain, Value,
+                           getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
+                           I.getOperand(1), DstOff);
+          } else {
+            Value = DAG.getLoad(VT, getRoot(),
+                        getMemBasePlusOffset(Op2, SrcOff, DAG, TLI),
+                        I.getOperand(2), SrcOff);
+            Chain = Value.getValue(1);
+            Store =
+              DAG.getStore(Chain, Value,
+                           getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
+                           I.getOperand(1), DstOff);
+          }
+          OutChains.push_back(Store);
+          SrcOff += VTSize;
+          DstOff += VTSize;
+        }
+      }
+      break;
+    }
+    }
+
+    if (!OutChains.empty()) {
+      DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
+                  &OutChains[0], OutChains.size()));
+      return;
+    }
+  }
+
+  DAG.setRoot(DAG.getNode(Op, MVT::Other, getRoot(), Op1, Op2, Op3, Op4));
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+unsigned SelectionDAGISel::MakeReg(MVT::ValueType VT) {
+  return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.setPreservesAll();
+}
+
+
+
+bool SelectionDAGISel::runOnFunction(Function &Fn) {
+  MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine());
+  RegMap = MF.getSSARegMap();
+  DOUT << "\n\n\n=== " << Fn.getName() << "\n";
+
+  FunctionLoweringInfo FuncInfo(TLI, Fn, MF);
+
+  if (ExceptionHandling)
+    for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+      if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
+        // Mark landing pad.
+        FuncInfo.MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+    SelectBasicBlock(I, MF, FuncInfo);
+
+  // Add function live-ins to entry block live-in set.
+  BasicBlock *EntryBB = &Fn.getEntryBlock();
+  BB = FuncInfo.MBBMap[EntryBB];
+  if (!MF.livein_empty())
+    for (MachineFunction::livein_iterator I = MF.livein_begin(),
+           E = MF.livein_end(); I != E; ++I)
+      BB->addLiveIn(I->first);
+
+#ifndef NDEBUG
+  assert(FuncInfo.CatchInfoFound.size() == FuncInfo.CatchInfoLost.size() &&
+         "Not all catch info was assigned to a landing pad!");
+#endif
+
+  return true;
+}
+
+SDOperand SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, 
+                                                           unsigned Reg) {
+  SDOperand Op = getValue(V);
+  assert((Op.getOpcode() != ISD::CopyFromReg ||
+          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+         "Copy from a reg to the same reg!");
+  
+  MVT::ValueType SrcVT = Op.getValueType();
+  MVT::ValueType RegisterVT = TLI.getRegisterType(SrcVT);
+  unsigned NumRegs = TLI.getNumRegisters(SrcVT);
+  SmallVector<SDOperand, 8> Regs(NumRegs);
+  SmallVector<SDOperand, 8> Chains(NumRegs);
+
+  // Copy the value by legal parts into sequential virtual registers.
+  getCopyToParts(DAG, Op, &Regs[0], NumRegs, RegisterVT);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    Chains[i] = DAG.getCopyToReg(getRoot(), Reg + i, Regs[i]);
+  return DAG.getNode(ISD::TokenFactor, MVT::Other, &Chains[0], NumRegs);
+}
+
+void SelectionDAGISel::
+LowerArguments(BasicBlock *LLVMBB, SelectionDAGLowering &SDL,
+               std::vector<SDOperand> &UnorderedChains) {
+  // If this is the entry block, emit arguments.
+  Function &F = *LLVMBB->getParent();
+  FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
+  SDOperand OldRoot = SDL.DAG.getRoot();
+  std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
+
+  unsigned a = 0;
+  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+       AI != E; ++AI, ++a)
+    if (!AI->use_empty()) {
+      SDL.setValue(AI, Args[a]);
+
+      // If this argument is live outside of the entry block, insert a copy from
+      // whereever we got it to the vreg that other BB's will reference it as.
+      DenseMap<const Value*, unsigned>::iterator VMI=FuncInfo.ValueMap.find(AI);
+      if (VMI != FuncInfo.ValueMap.end()) {
+        SDOperand Copy = SDL.CopyValueToVirtualRegister(AI, VMI->second);
+        UnorderedChains.push_back(Copy);
+      }
+    }
+
+  // Finally, if the target has anything special to do, allow it to do so.
+  // FIXME: this should insert code into the DAG!
+  EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
+}
+
+static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+                          MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+  assert(!FLI.MBBMap[SrcBB]->isLandingPad() &&
+         "Copying catch info out of a landing pad!");
+  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+    if (isSelector(I)) {
+      // Apply the catch info to DestBB.
+      addCatchInfo(cast<CallInst>(*I), MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+      FLI.CatchInfoFound.insert(I);
+#endif
+    }
+}
+
+void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB,
+       std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate,
+                                         FunctionLoweringInfo &FuncInfo) {
+  SelectionDAGLowering SDL(DAG, TLI, FuncInfo);
+
+  std::vector<SDOperand> UnorderedChains;
+
+  // Lower any arguments needed in this block if this is the entry block.
+  if (LLVMBB == &LLVMBB->getParent()->getEntryBlock())
+    LowerArguments(LLVMBB, SDL, UnorderedChains);
+
+  BB = FuncInfo.MBBMap[LLVMBB];
+  SDL.setCurrentBasicBlock(BB);
+
+  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+  if (ExceptionHandling && MMI && BB->isLandingPad()) {
+    // Add a label to mark the beginning of the landing pad.  Deletion of the
+    // landing pad can thus be detected via the MachineModuleInfo.
+    unsigned LabelID = MMI->addLandingPad(BB);
+    DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, DAG.getEntryNode(),
+                            DAG.getConstant(LabelID, MVT::i32)));
+
+    // Mark exception register as live in.
+    unsigned Reg = TLI.getExceptionAddressRegister();
+    if (Reg) BB->addLiveIn(Reg);
+
+    // Mark exception selector register as live in.
+    Reg = TLI.getExceptionSelectorRegister();
+    if (Reg) BB->addLiveIn(Reg);
+
+    // FIXME: Hack around an exception handling flaw (PR1508): the personality
+    // function and list of typeids logically belong to the invoke (or, if you
+    // like, the basic block containing the invoke), and need to be associated
+    // with it in the dwarf exception handling tables.  Currently however the
+    // information is provided by an intrinsic (eh.selector) that can be moved
+    // to unexpected places by the optimizers: if the unwind edge is critical,
+    // then breaking it can result in the intrinsics being in the successor of
+    // the landing pad, not the landing pad itself.  This results in exceptions
+    // not being caught because no typeids are associated with the invoke.
+    // This may not be the only way things can go wrong, but it is the only way
+    // we try to work around for the moment.
+    BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+    if (Br && Br->isUnconditional()) { // Critical edge?
+      BasicBlock::iterator I, E;
+      for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+        if (isSelector(I))
+          break;
+
+      if (I == E)
+        // No catch info found - try to extract some from the successor.
+        copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, FuncInfo);
+    }
+  }
+
+  // Lower all of the non-terminator instructions.
+  for (BasicBlock::iterator I = LLVMBB->begin(), E = --LLVMBB->end();
+       I != E; ++I)
+    SDL.visit(*I);
+
+  // Ensure that all instructions which are used outside of their defining
+  // blocks are available as virtual registers.  Invoke is handled elsewhere.
+  for (BasicBlock::iterator I = LLVMBB->begin(), E = LLVMBB->end(); I != E;++I)
+    if (!I->use_empty() && !isa<PHINode>(I) && !isa<InvokeInst>(I)) {
+      DenseMap<const Value*, unsigned>::iterator VMI =FuncInfo.ValueMap.find(I);
+      if (VMI != FuncInfo.ValueMap.end())
+        UnorderedChains.push_back(
+                                SDL.CopyValueToVirtualRegister(I, VMI->second));
+    }
+
+  // Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
+  // ensure constants are generated when needed.  Remember the virtual registers
+  // that need to be added to the Machine PHI nodes as input.  We cannot just
+  // directly add them, because expansion might result in multiple MBB's for one
+  // BB.  As such, the start of the BB might correspond to a different MBB than
+  // the end.
+  //
+  TerminatorInst *TI = LLVMBB->getTerminator();
+
+  // Emit constants only once even if used by multiple PHI nodes.
+  std::map<Constant*, unsigned> ConstantsOut;
+  
+  // Vector bool would be better, but vector<bool> is really slow.
+  std::vector<unsigned char> SuccsHandled;
+  if (TI->getNumSuccessors())
+    SuccsHandled.resize(BB->getParent()->getNumBlockIDs());
+    
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+    
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    unsigned SuccMBBNo = SuccMBB->getNumber();
+    if (SuccsHandled[SuccMBBNo]) continue;
+    SuccsHandled[SuccMBBNo] = true;
+    
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+    PHINode *PN;
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::iterator I = SuccBB->begin();
+         (PN = dyn_cast<PHINode>(I)); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+      
+      unsigned Reg;
+      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+      
+      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = ConstantsOut[C];
+        if (RegOut == 0) {
+          RegOut = FuncInfo.CreateRegForValue(C);
+          UnorderedChains.push_back(
+                           SDL.CopyValueToVirtualRegister(C, RegOut));
+        }
+        Reg = RegOut;
+      } else {
+        Reg = FuncInfo.ValueMap[PHIOp];
+        if (Reg == 0) {
+          assert(isa<AllocaInst>(PHIOp) &&
+                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 "Didn't codegen value into a register!??");
+          Reg = FuncInfo.CreateRegForValue(PHIOp);
+          UnorderedChains.push_back(
+                           SDL.CopyValueToVirtualRegister(PHIOp, Reg));
+        }
+      }
+
+      // Remember that this register needs to added to the machine PHI node as
+      // the input for this MBB.
+      MVT::ValueType VT = TLI.getValueType(PN->getType());
+      unsigned NumRegisters = TLI.getNumRegisters(VT);
+      for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+        PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+    }
+  }
+  ConstantsOut.clear();
+
+  // Turn all of the unordered chains into one factored node.
+  if (!UnorderedChains.empty()) {
+    SDOperand Root = SDL.getRoot();
+    if (Root.getOpcode() != ISD::EntryToken) {
+      unsigned i = 0, e = UnorderedChains.size();
+      for (; i != e; ++i) {
+        assert(UnorderedChains[i].Val->getNumOperands() > 1);
+        if (UnorderedChains[i].Val->getOperand(0) == Root)
+          break;  // Don't add the root if we already indirectly depend on it.
+      }
+        
+      if (i == e)
+        UnorderedChains.push_back(Root);
+    }
+    DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
+                            &UnorderedChains[0], UnorderedChains.size()));
+  }
+
+  // Lower the terminator after the copies are emitted.
+  SDL.visit(*LLVMBB->getTerminator());
+
+  // Copy over any CaseBlock records that may now exist due to SwitchInst
+  // lowering, as well as any jump table information.
+  SwitchCases.clear();
+  SwitchCases = SDL.SwitchCases;
+  JTCases.clear();
+  JTCases = SDL.JTCases;
+  BitTestCases.clear();
+  BitTestCases = SDL.BitTestCases;
+    
+  // Make sure the root of the DAG is up-to-date.
+  DAG.setRoot(SDL.getRoot());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) {
+  // Get alias analysis for load/store combining.
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // Run the DAG combiner in pre-legalize mode.
+  DAG.Combine(false, AA);
+  
+  DOUT << "Lowered selection DAG:\n";
+  DEBUG(DAG.dump());
+  
+  // Second step, hack on the DAG until it only uses operations and types that
+  // the target supports.
+  DAG.Legalize();
+  
+  DOUT << "Legalized selection DAG:\n";
+  DEBUG(DAG.dump());
+  
+  // Run the DAG combiner in post-legalize mode.
+  DAG.Combine(true, AA);
+  
+  if (ViewISelDAGs) DAG.viewGraph();
+
+  // Third, instruction select all of the operations to machine code, adding the
+  // code to the MachineBasicBlock.
+  InstructionSelectBasicBlock(DAG);
+  
+  DOUT << "Selected machine code:\n";
+  DEBUG(BB->dump());
+}  
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF,
+                                        FunctionLoweringInfo &FuncInfo) {
+  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+  {
+    SelectionDAG DAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+    CurDAG = &DAG;
+  
+    // First step, lower LLVM code to some DAG.  This DAG may use operations and
+    // types that are not supported by the target.
+    BuildSelectionDAG(DAG, LLVMBB, PHINodesToUpdate, FuncInfo);
+
+    // Second step, emit the lowered DAG as machine code.
+    CodeGenAndEmitDAG(DAG);
+  }
+
+  DOUT << "Total amount of phi nodes to update: "
+       << PHINodesToUpdate.size() << "\n";
+  DEBUG(for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i)
+          DOUT << "Node " << i << " : (" << PHINodesToUpdate[i].first
+               << ", " << PHINodesToUpdate[i].second << ")\n";);
+  
+  // Next, now that we know what the last MBB the LLVM BB expanded is, update
+  // PHI nodes in successors.
+  if (SwitchCases.empty() && JTCases.empty() && BitTestCases.empty()) {
+    for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = PHINodesToUpdate[i].first;
+      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+             "This is not a machine PHI node that we are updating!");
+      PHI->addRegOperand(PHINodesToUpdate[i].second, false);
+      PHI->addMachineBasicBlockOperand(BB);
+    }
+    return;
+  }
+
+  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!BitTestCases[i].Emitted) {
+      SelectionDAG HSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+      CurDAG = &HSDAG;
+      SelectionDAGLowering HSDL(HSDAG, TLI, FuncInfo);    
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = BitTestCases[i].Parent;
+      HSDL.setCurrentBasicBlock(BB);
+      // Emit the code
+      HSDL.visitBitTestHeader(BitTestCases[i]);
+      HSDAG.setRoot(HSDL.getRoot());
+      CodeGenAndEmitDAG(HSDAG);
+    }    
+
+    for (unsigned j = 0, ej = BitTestCases[i].Cases.size(); j != ej; ++j) {
+      SelectionDAG BSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+      CurDAG = &BSDAG;
+      SelectionDAGLowering BSDL(BSDAG, TLI, FuncInfo);
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = BitTestCases[i].Cases[j].ThisBB;
+      BSDL.setCurrentBasicBlock(BB);
+      // Emit the code
+      if (j+1 != ej)
+        BSDL.visitBitTestCase(BitTestCases[i].Cases[j+1].ThisBB,
+                              BitTestCases[i].Reg,
+                              BitTestCases[i].Cases[j]);
+      else
+        BSDL.visitBitTestCase(BitTestCases[i].Default,
+                              BitTestCases[i].Reg,
+                              BitTestCases[i].Cases[j]);
+        
+        
+      BSDAG.setRoot(BSDL.getRoot());
+      CodeGenAndEmitDAG(BSDAG);
+    }
+
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+             "This is not a machine PHI node that we are updating!");
+      // This is "default" BB. We have two jumps to it. From "header" BB and
+      // from last "case" BB.
+      if (PHIBB == BitTestCases[i].Default) {
+        PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+        PHI->addMachineBasicBlockOperand(BitTestCases[i].Parent);
+        PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+        PHI->addMachineBasicBlockOperand(BitTestCases[i].Cases.back().ThisBB);
+      }
+      // One of "cases" BB.
+      for (unsigned j = 0, ej = BitTestCases[i].Cases.size(); j != ej; ++j) {
+        MachineBasicBlock* cBB = BitTestCases[i].Cases[j].ThisBB;
+        if (cBB->succ_end() !=
+            std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
+          PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+          PHI->addMachineBasicBlockOperand(cBB);
+        }
+      }
+    }
+  }
+
+  // If the JumpTable record is filled in, then we need to emit a jump table.
+  // Updating the PHI nodes is tricky in this case, since we need to determine
+  // whether the PHI is a successor of the range check MBB or the jump table MBB
+  for (unsigned i = 0, e = JTCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!JTCases[i].first.Emitted) {
+      SelectionDAG HSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+      CurDAG = &HSDAG;
+      SelectionDAGLowering HSDL(HSDAG, TLI, FuncInfo);    
+      // Set the current basic block to the mbb we wish to insert the code into
+      BB = JTCases[i].first.HeaderBB;
+      HSDL.setCurrentBasicBlock(BB);
+      // Emit the code
+      HSDL.visitJumpTableHeader(JTCases[i].second, JTCases[i].first);
+      HSDAG.setRoot(HSDL.getRoot());
+      CodeGenAndEmitDAG(HSDAG);
+    }
+    
+    SelectionDAG JSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+    CurDAG = &JSDAG;
+    SelectionDAGLowering JSDL(JSDAG, TLI, FuncInfo);
+    // Set the current basic block to the mbb we wish to insert the code into
+    BB = JTCases[i].second.MBB;
+    JSDL.setCurrentBasicBlock(BB);
+    // Emit the code
+    JSDL.visitJumpTable(JTCases[i].second);
+    JSDAG.setRoot(JSDL.getRoot());
+    CodeGenAndEmitDAG(JSDAG);
+    
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+             "This is not a machine PHI node that we are updating!");
+      // "default" BB. We can go there only from header BB.
+      if (PHIBB == JTCases[i].second.Default) {
+        PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+        PHI->addMachineBasicBlockOperand(JTCases[i].first.HeaderBB);
+      }
+      // JT BB. Just iterate over successors here
+      if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
+        PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+        PHI->addMachineBasicBlockOperand(BB);
+      }
+    }
+  }
+  
+  // If the switch block involved a branch to one of the actual successors, we
+  // need to update PHI nodes in that block.
+  for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = PHINodesToUpdate[i].first;
+    assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+           "This is not a machine PHI node that we are updating!");
+    if (BB->isSuccessor(PHI->getParent())) {
+      PHI->addRegOperand(PHINodesToUpdate[i].second, false);
+      PHI->addMachineBasicBlockOperand(BB);
+    }
+  }
+  
+  // If we generated any switch lowering information, build and codegen any
+  // additional DAGs necessary.
+  for (unsigned i = 0, e = SwitchCases.size(); i != e; ++i) {
+    SelectionDAG SDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+    CurDAG = &SDAG;
+    SelectionDAGLowering SDL(SDAG, TLI, FuncInfo);
+    
+    // Set the current basic block to the mbb we wish to insert the code into
+    BB = SwitchCases[i].ThisBB;
+    SDL.setCurrentBasicBlock(BB);
+    
+    // Emit the code
+    SDL.visitSwitchCase(SwitchCases[i]);
+    SDAG.setRoot(SDL.getRoot());
+    CodeGenAndEmitDAG(SDAG);
+    
+    // Handle any PHI nodes in successors of this chunk, as if we were coming
+    // from the original BB before switch expansion.  Note that PHI nodes can
+    // occur multiple times in PHINodesToUpdate.  We have to be very careful to
+    // handle them the right number of times.
+    while ((BB = SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
+      for (MachineBasicBlock::iterator Phi = BB->begin();
+           Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
+        // This value for this PHI node is recorded in PHINodesToUpdate, get it.
+        for (unsigned pn = 0; ; ++pn) {
+          assert(pn != PHINodesToUpdate.size() && "Didn't find PHI entry!");
+          if (PHINodesToUpdate[pn].first == Phi) {
+            Phi->addRegOperand(PHINodesToUpdate[pn].second, false);
+            Phi->addMachineBasicBlockOperand(SwitchCases[i].ThisBB);
+            break;
+          }
+        }
+      }
+      
+      // Don't process RHS if same block as LHS.
+      if (BB == SwitchCases[i].FalseBB)
+        SwitchCases[i].FalseBB = 0;
+      
+      // If we haven't handled the RHS, do so now.  Otherwise, we're done.
+      SwitchCases[i].TrueBB = SwitchCases[i].FalseBB;
+      SwitchCases[i].FalseBB = 0;
+    }
+    assert(SwitchCases[i].TrueBB == 0 && SwitchCases[i].FalseBB == 0);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+/// ScheduleAndEmitDAG - Pick a safe ordering and emit instructions for each
+/// target node in the graph.
+void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) {
+  if (ViewSchedDAGs) DAG.viewGraph();
+
+  RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+  
+  if (!Ctor) {
+    Ctor = ISHeuristic;
+    RegisterScheduler::setDefault(Ctor);
+  }
+  
+  ScheduleDAG *SL = Ctor(this, &DAG, BB);
+  BB = SL->Run();
+  delete SL;
+}
+
+
+HazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
+  return new HazardRecognizer();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDOperand LHS, ConstantSDNode *RHS, 
+                                    int64_t DesiredMaskS) {
+  uint64_t ActualMask = RHS->getValue();
+  uint64_t DesiredMask =DesiredMaskS & MVT::getIntVTBitMask(LHS.getValueType());
+  
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+  
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask & ~DesiredMask)
+    return false;
+  
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  uint64_t NeededMask = DesiredMask & ~ActualMask;
+  if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+    return true;
+  
+  // TODO: check to see if missing bits are just not demanded.
+
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDOperand LHS, ConstantSDNode *RHS, 
+                                    int64_t DesiredMaskS) {
+  uint64_t ActualMask = RHS->getValue();
+  uint64_t DesiredMask =DesiredMaskS & MVT::getIntVTBitMask(LHS.getValueType());
+  
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+  
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask & ~DesiredMask)
+    return false;
+  
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  uint64_t NeededMask = DesiredMask & ~ActualMask;
+  
+  uint64_t KnownZero, KnownOne;
+  CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+  
+  // If all the missing bits in the or are already known to be set, match!
+  if ((NeededMask & KnownOne) == NeededMask)
+    return true;
+  
+  // TODO: check to see if missing bits are just not demanded.
+  
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen.  Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDOperand> &Ops, SelectionDAG &DAG) {
+  std::vector<SDOperand> InOps;
+  std::swap(InOps, Ops);
+
+  Ops.push_back(InOps[0]);  // input chain.
+  Ops.push_back(InOps[1]);  // input asm string.
+
+  unsigned i = 2, e = InOps.size();
+  if (InOps[e-1].getValueType() == MVT::Flag)
+    --e;  // Don't process a flag operand if it is here.
+  
+  while (i != e) {
+    unsigned Flags = cast<ConstantSDNode>(InOps[i])->getValue();
+    if ((Flags & 7) != 4 /*MEM*/) {
+      // Just skip over this operand, copying the operands verbatim.
+      Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+(Flags >> 3) + 1);
+      i += (Flags >> 3) + 1;
+    } else {
+      assert((Flags >> 3) == 1 && "Memory operand with multiple values?");
+      // Otherwise, this is a memory operand.  Ask the target to select it.
+      std::vector<SDOperand> SelOps;
+      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps, DAG)) {
+        cerr << "Could not match memory address.  Inline asm failure!\n";
+        exit(1);
+      }
+      
+      // Add this to the output node.
+      MVT::ValueType IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+      Ops.push_back(DAG.getTargetConstant(4/*MEM*/ | (SelOps.size() << 3),
+                                          IntPtrTy));
+      Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+      i += 2;
+    }
+  }
+  
+  // Add the flag input back if present.
+  if (e != InOps.size())
+    Ops.push_back(InOps.back());
+}
+
+char SelectionDAGISel::ID = 0;

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000..12b5682
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp

@@ -0,0 +1,245 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+#include <sstream>
+using namespace llvm;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+    static std::string getGraphName(const SelectionDAG *G) {
+      return G->getMachineFunction().getFunction()->getName();
+    }
+
+    static bool renderGraphFromBottomUp() {
+      return true;
+    }
+    
+    static bool hasNodeAddressLabel(const SDNode *Node,
+                                    const SelectionDAG *Graph) {
+      return true;
+    }
+    
+    /// If you want to override the dot attributes printed for a particular
+    /// edge, override this method.
+    template<typename EdgeIter>
+    static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+      SDOperand Op = EI.getNode()->getOperand(EI.getOperand());
+      MVT::ValueType VT = Op.getValueType();
+      if (VT == MVT::Flag)
+        return "color=red,style=bold";
+      else if (VT == MVT::Other)
+        return "color=blue,style=dashed";
+      return "";
+    }
+    
+
+    static std::string getNodeLabel(const SDNode *Node,
+                                    const SelectionDAG *Graph);
+    static std::string getNodeAttributes(const SDNode *N,
+                                         const SelectionDAG *Graph) {
+#ifndef NDEBUG
+      const std::string &Attrs = Graph->getGraphAttrs(N);
+      if (!Attrs.empty()) {
+        if (Attrs.find("shape=") == std::string::npos)
+          return std::string("shape=Mrecord,") + Attrs;
+        else
+          return Attrs;
+      }
+#endif
+      return "shape=Mrecord";
+    }
+
+    static void addCustomGraphFeatures(SelectionDAG *G,
+                                       GraphWriter<SelectionDAG*> &GW) {
+      GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+      if (G->getRoot().Val)
+        GW.emitEdge(0, -1, G->getRoot().Val, -1, "");
+    }
+  };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+                                                        const SelectionDAG *G) {
+  std::string Op = Node->getOperationName(G);
+
+  for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+    if (Node->getValueType(i) == MVT::Other)
+      Op += ":ch";
+    else
+      Op = Op + ":" + MVT::getValueTypeString(Node->getValueType(i));
+    
+  if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) {
+    Op += ": " + utostr(CSDN->getValue());
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) {
+    Op += ": " + ftostr(CSDN->getValue());
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(Node)) {
+    int offset = GADN->getOffset();
+    Op += ": " + GADN->getGlobal()->getName();
+    if (offset > 0)
+      Op += "+" + itostr(offset);
+    else
+      Op += itostr(offset);
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) {
+    Op += " " + itostr(FIDN->getIndex());
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) {
+    Op += " " + itostr(JTDN->getIndex());
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){
+    if (CP->isMachineConstantPoolEntry()) {
+      std::ostringstream SS;
+      CP->getMachineCPVal()->print(SS);
+      Op += "<" + SS.str() + ">";
+    } else {
+      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+        Op += "<" + ftostr(CFP->getValue()) + ">";
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
+        Op += "<" + utostr(CI->getZExtValue()) + ">";
+      else {
+        std::ostringstream SS;
+        WriteAsOperand(SS, CP->getConstVal(), false);
+        Op += "<" + SS.str() + ">";
+      }
+    }
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {
+    Op = "BB: ";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      Op += LBB->getName();
+    //Op += " " + (const void*)BBDN->getBasicBlock();
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) {
+    if (G && R->getReg() != 0 &&
+        MRegisterInfo::isPhysicalRegister(R->getReg())) {
+      Op = Op + " " + G->getTarget().getRegisterInfo()->getName(R->getReg());
+    } else {
+      Op += " #" + utostr(R->getReg());
+    }
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(Node)) {
+    Op += "'" + std::string(ES->getSymbol()) + "'";
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) {
+    if (M->getValue())
+      Op += "<" + M->getValue()->getName() + ":" + itostr(M->getOffset()) + ">";
+    else
+      Op += "<null:" + itostr(M->getOffset()) + ">";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) {
+    Op = Op + " VT=" + MVT::getValueTypeString(N->getVT());
+  } else if (const StringSDNode *N = dyn_cast<StringSDNode>(Node)) {
+    Op = Op + "\"" + N->getValue() + "\"";
+  } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) {
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD:
+      Op = Op + "<anyext ";
+      break;
+    case ISD::SEXTLOAD:
+      Op = Op + " <sext ";
+      break;
+    case ISD::ZEXTLOAD:
+      Op = Op + " <zext ";
+      break;
+    }
+    if (doExt)
+      Op = Op + MVT::getValueTypeString(LD->getLoadedVT()) + ">";
+
+    Op += LD->getIndexedModeName(LD->getAddressingMode());
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) {
+    if (ST->isTruncatingStore())
+      Op = Op + "<trunc " + MVT::getValueTypeString(ST->getStoredVT()) + ">";
+    Op += ST->getIndexedModeName(ST->getAddressingMode());
+  }
+  
+  return Op;
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName());
+#else
+  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+       << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
+
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+  NodeGraphAttrs.clear();
+#else
+  cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+       << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = Attrs;
+#else
+  cerr << "SelectionDAG::setGraphAttrs is only available in debug builds"
+       << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+  std::map<const SDNode *, std::string>::const_iterator I =
+    NodeGraphAttrs.find(N);
+    
+  if (I != NodeGraphAttrs.end())
+    return I->second;
+  else
+    return "";
+#else
+  cerr << "SelectionDAG::getGraphAttrs is only available in debug builds"
+       << " on systems with Graphviz or gv!\n";
+  return std::string("");
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+  cerr << "SelectionDAG::setGraphColor is only available in debug builds"
+       << " on systems with Graphviz or gv!\n";
+#endif
+}
+

diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000..1b7b436
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -0,0 +1,1753 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+  Names[RTLIB::SHL_I32] = "__ashlsi3";
+  Names[RTLIB::SHL_I64] = "__ashldi3";
+  Names[RTLIB::SRL_I32] = "__lshrsi3";
+  Names[RTLIB::SRL_I64] = "__lshrdi3";
+  Names[RTLIB::SRA_I32] = "__ashrsi3";
+  Names[RTLIB::SRA_I64] = "__ashrdi3";
+  Names[RTLIB::MUL_I32] = "__mulsi3";
+  Names[RTLIB::MUL_I64] = "__muldi3";
+  Names[RTLIB::SDIV_I32] = "__divsi3";
+  Names[RTLIB::SDIV_I64] = "__divdi3";
+  Names[RTLIB::UDIV_I32] = "__udivsi3";
+  Names[RTLIB::UDIV_I64] = "__udivdi3";
+  Names[RTLIB::SREM_I32] = "__modsi3";
+  Names[RTLIB::SREM_I64] = "__moddi3";
+  Names[RTLIB::UREM_I32] = "__umodsi3";
+  Names[RTLIB::UREM_I64] = "__umoddi3";
+  Names[RTLIB::NEG_I32] = "__negsi2";
+  Names[RTLIB::NEG_I64] = "__negdi2";
+  Names[RTLIB::ADD_F32] = "__addsf3";
+  Names[RTLIB::ADD_F64] = "__adddf3";
+  Names[RTLIB::SUB_F32] = "__subsf3";
+  Names[RTLIB::SUB_F64] = "__subdf3";
+  Names[RTLIB::MUL_F32] = "__mulsf3";
+  Names[RTLIB::MUL_F64] = "__muldf3";
+  Names[RTLIB::DIV_F32] = "__divsf3";
+  Names[RTLIB::DIV_F64] = "__divdf3";
+  Names[RTLIB::REM_F32] = "fmodf";
+  Names[RTLIB::REM_F64] = "fmod";
+  Names[RTLIB::NEG_F32] = "__negsf2";
+  Names[RTLIB::NEG_F64] = "__negdf2";
+  Names[RTLIB::POWI_F32] = "__powisf2";
+  Names[RTLIB::POWI_F64] = "__powidf2";
+  Names[RTLIB::SQRT_F32] = "sqrtf";
+  Names[RTLIB::SQRT_F64] = "sqrt";
+  Names[RTLIB::SIN_F32] = "sinf";
+  Names[RTLIB::SIN_F64] = "sin";
+  Names[RTLIB::COS_F32] = "cosf";
+  Names[RTLIB::COS_F64] = "cos";
+  Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+  Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+  Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+  Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+  Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+  Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+  Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+  Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+  Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+  Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+  Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+  Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+  Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+  Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+  Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+  Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+  Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+  Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+  Names[RTLIB::OEQ_F32] = "__eqsf2";
+  Names[RTLIB::OEQ_F64] = "__eqdf2";
+  Names[RTLIB::UNE_F32] = "__nesf2";
+  Names[RTLIB::UNE_F64] = "__nedf2";
+  Names[RTLIB::OGE_F32] = "__gesf2";
+  Names[RTLIB::OGE_F64] = "__gedf2";
+  Names[RTLIB::OLT_F32] = "__ltsf2";
+  Names[RTLIB::OLT_F64] = "__ltdf2";
+  Names[RTLIB::OLE_F32] = "__lesf2";
+  Names[RTLIB::OLE_F64] = "__ledf2";
+  Names[RTLIB::OGT_F32] = "__gtsf2";
+  Names[RTLIB::OGT_F64] = "__gtdf2";
+  Names[RTLIB::UO_F32] = "__unordsf2";
+  Names[RTLIB::UO_F64] = "__unorddf2";
+  Names[RTLIB::O_F32] = "__unordsf2";
+  Names[RTLIB::O_F64] = "__unorddf2";
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+  memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+  CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+  CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+  CCs[RTLIB::UNE_F32] = ISD::SETNE;
+  CCs[RTLIB::UNE_F64] = ISD::SETNE;
+  CCs[RTLIB::OGE_F32] = ISD::SETGE;
+  CCs[RTLIB::OGE_F64] = ISD::SETGE;
+  CCs[RTLIB::OLT_F32] = ISD::SETLT;
+  CCs[RTLIB::OLT_F64] = ISD::SETLT;
+  CCs[RTLIB::OLE_F32] = ISD::SETLE;
+  CCs[RTLIB::OLE_F64] = ISD::SETLE;
+  CCs[RTLIB::OGT_F32] = ISD::SETGT;
+  CCs[RTLIB::OGT_F64] = ISD::SETGT;
+  CCs[RTLIB::UO_F32] = ISD::SETNE;
+  CCs[RTLIB::UO_F64] = ISD::SETNE;
+  CCs[RTLIB::O_F32] = ISD::SETEQ;
+  CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+TargetLowering::TargetLowering(TargetMachine &tm)
+  : TM(tm), TD(TM.getTargetData()) {
+  assert(ISD::BUILTIN_OP_END <= 156 &&
+         "Fixed size array in TargetLowering is not large enough!");
+  // All operations default to being supported.
+  memset(OpActions, 0, sizeof(OpActions));
+  memset(LoadXActions, 0, sizeof(LoadXActions));
+  memset(&StoreXActions, 0, sizeof(StoreXActions));
+  memset(&IndexedModeActions, 0, sizeof(IndexedModeActions));
+
+  // Set all indexed load / store to expand.
+  for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+    for (unsigned IM = (unsigned)ISD::PRE_INC;
+         IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+      setIndexedLoadAction(IM, (MVT::ValueType)VT, Expand);
+      setIndexedStoreAction(IM, (MVT::ValueType)VT, Expand);
+    }
+  }
+
+  IsLittleEndian = TD->isLittleEndian();
+  UsesGlobalOffsetTable = false;
+  ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType());
+  ShiftAmtHandling = Undefined;
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, 
+         sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0]));
+  maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+  allowUnalignedMemoryAccesses = false;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  SchedPreferenceInfo = SchedulingForLatency;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  IfCvtBlockSizeLimit = 2;
+
+  InitLibcallNames(LibcallRoutineNames);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+}
+
+TargetLowering::~TargetLowering() {}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+  assert(MVT::LAST_VALUETYPE <= 32 &&
+         "Too many value types for ValueTypeActions to hold!");
+
+  // Everything defaults to needing one register.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    NumRegistersForVT[i] = 1;
+    RegisterTypeForVT[i] = TransformToType[i] = i;
+  }
+  // ...except isVoid, which doesn't need any registers.
+  NumRegistersForVT[MVT::isVoid] = 0;
+
+  // Find the largest integer register class.
+  unsigned LargestIntReg = MVT::i128;
+  for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+    assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+  // Every integer value type larger than this largest register takes twice as
+  // many registers to represent as the previous ValueType.
+  for (MVT::ValueType ExpandedReg = LargestIntReg + 1;
+       MVT::isInteger(ExpandedReg); ++ExpandedReg) {
+    NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+    RegisterTypeForVT[ExpandedReg] = LargestIntReg;
+    TransformToType[ExpandedReg] = ExpandedReg - 1;
+    ValueTypeActions.setTypeAction(ExpandedReg, Expand);
+  }
+
+  // Inspect all of the ValueType's smaller than the largest integer
+  // register to see which ones need promotion.
+  MVT::ValueType LegalIntReg = LargestIntReg;
+  for (MVT::ValueType IntReg = LargestIntReg - 1;
+       IntReg >= MVT::i1; --IntReg) {
+    if (isTypeLegal(IntReg)) {
+      LegalIntReg = IntReg;
+    } else {
+      RegisterTypeForVT[IntReg] = TransformToType[IntReg] = LegalIntReg;
+      ValueTypeActions.setTypeAction(IntReg, Promote);
+    }
+  }
+
+  // Decide how to handle f64. If the target does not have native f64 support,
+  // expand it to i64 and we will be generating soft float library calls.
+  if (!isTypeLegal(MVT::f64)) {
+    NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+    RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+    TransformToType[MVT::f64] = MVT::i64;
+    ValueTypeActions.setTypeAction(MVT::f64, Expand);
+  }
+
+  // Decide how to handle f32. If the target does not have native support for
+  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  if (!isTypeLegal(MVT::f32)) {
+    if (isTypeLegal(MVT::f64)) {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+      TransformToType[MVT::f32] = MVT::f64;
+      ValueTypeActions.setTypeAction(MVT::f32, Promote);
+    } else {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+      TransformToType[MVT::f32] = MVT::i32;
+      ValueTypeActions.setTypeAction(MVT::f32, Expand);
+    }
+  }
+  
+  // Loop over all of the vector value types to see which need transformations.
+  for (MVT::ValueType i = MVT::FIRST_VECTOR_VALUETYPE;
+       i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    if (!isTypeLegal(i)) {
+      MVT::ValueType IntermediateVT, RegisterVT;
+      unsigned NumIntermediates;
+      NumRegistersForVT[i] =
+        getVectorTypeBreakdown(i,
+                               IntermediateVT, NumIntermediates,
+                               RegisterVT);
+      RegisterTypeForVT[i] = RegisterVT;
+      TransformToType[i] = MVT::Other; // this isn't actually used
+      ValueTypeActions.setTypeAction(i, Expand);
+    }
+  }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  return NULL;
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register.  It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(MVT::ValueType VT, 
+                                                MVT::ValueType &IntermediateVT,
+                                                unsigned &NumIntermediates,
+                                      MVT::ValueType &RegisterVT) const {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = MVT::getVectorNumElements(VT);
+  MVT::ValueType EltTy = MVT::getVectorElementType(VT);
+  
+  unsigned NumVectorRegs = 1;
+  
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 &&
+         !isTypeLegal(MVT::getVectorType(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+  
+  MVT::ValueType NewVT = MVT::getVectorType(EltTy, NumElts);
+  if (!isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  MVT::ValueType DestVT = getTypeToTransformTo(NewVT);
+  RegisterVT = DestVT;
+  if (DestVT < NewVT) {
+    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(MVT::getSizeInBits(NewVT)/MVT::getSizeInBits(DestVT));
+  } else {
+    // Otherwise, promotion or legal types use the same number of registers as
+    // the vector decimated to the appropriate level.
+    return NumVectorRegs;
+  }
+  
+  return 1;
+}
+
+//===----------------------------------------------------------------------===//
+//  Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// specified instruction is a constant integer.  If so, check to see if there
+/// are any bits set in the constant that are not demanded.  If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, 
+                                                            uint64_t Demanded) {
+  // FIXME: ISD::SELECT, ISD::SELECT_CC
+  switch(Op.getOpcode()) {
+  default: break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+      if ((~Demanded & C->getValue()) != 0) {
+        MVT::ValueType VT = Op.getValueType();
+        SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0),
+                                    DAG.getConstant(Demanded & C->getValue(), 
+                                                    VT));
+        return CombineTo(Op, New);
+      }
+    break;
+  }
+  return false;
+}
+
+/// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream.  If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller).  The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask, 
+                                          uint64_t &KnownZero,
+                                          uint64_t &KnownOne,
+                                          TargetLoweringOpt &TLO,
+                                          unsigned Depth) const {
+  KnownZero = KnownOne = 0;   // Don't know anything.
+
+  // The masks are not wide enough to represent this type!  Should use APInt.
+  if (Op.getValueType() == MVT::i128)
+    return false;
+  
+  // Other users may use these bits.
+  if (!Op.Val->hasOneUse()) { 
+    if (Depth != 0) {
+      // If not at the root, Just compute the KnownZero/KnownOne bits to 
+      // simplify things downstream.
+      TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+      return false;
+    }
+    // If this is the root being simplified, allow it to have multiple uses,
+    // just set the DemandedMask to all bits.
+    DemandedMask = MVT::getIntVTBitMask(Op.getValueType());
+  } else if (DemandedMask == 0) {   
+    // Not demanding any bits from Op.
+    if (Op.getOpcode() != ISD::UNDEF)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType()));
+    return false;
+  } else if (Depth == 6) {        // Limit search depth.
+    return false;
+  }
+
+  uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask;
+    KnownZero = ~KnownOne & DemandedMask;
+    return false;   // Don't fall through, will infinitely loop.
+  case ISD::AND:
+    // If the RHS is a constant, check to see if the LHS would be zero without
+    // using the bits from the RHS.  Below, we use knowledge about the RHS to
+    // simplify the LHS, here we're using information from the LHS to simplify
+    // the RHS.
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      uint64_t LHSZero, LHSOne;
+      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), DemandedMask,
+                                LHSZero, LHSOne, Depth+1);
+      // If the LHS already has zeros where RHSC does, this and is dead.
+      if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask))
+        return TLO.CombineTo(Op, Op.getOperand(0));
+      // If any of the set bits in the RHS are known zero on the LHS, shrink
+      // the constant.
+      if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask))
+        return true;
+    }
+    
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero,
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+      
+    // If all of the demanded bits are known one on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask)
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2))
+      return true;
+      
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    break;
+  case ISD::OR:
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne, 
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the potentially set bits on one side are known to be set on
+    // the other side, just use the 'other' side.
+    if ((DemandedMask & (~KnownZero) & KnownOne2) == 
+        (DemandedMask & (~KnownZero)))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((DemandedMask & (~KnownZero2) & KnownOne) == 
+        (DemandedMask & (~KnownZero2)))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+          
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    break;
+  case ISD::XOR:
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'xor'.
+    if ((DemandedMask & KnownZero) == DemandedMask)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((DemandedMask & KnownZero2) == DemandedMask)
+      return TLO.CombineTo(Op, Op.getOperand(1));
+      
+    // If all of the unknown bits are known to be zero on one side or the other
+    // (but not both) turn this into an *inclusive* or.
+    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+    if ((DemandedMask & ~KnownZero & ~KnownZero2) == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(),
+                                               Op.getOperand(0),
+                                               Op.getOperand(1)));
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    
+    // If all of the demanded bits on one side are known, and all of the set
+    // bits on that side are also known to be set on the other side, turn this
+    // into an AND, as we know the bits will be cleared.
+    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+    if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known
+      if ((KnownOne & KnownOne2) == KnownOne) {
+        MVT::ValueType VT = Op.getValueType();
+        SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0),
+                                                 ANDC));
+      }
+    }
+    
+    // If the RHS is a constant, see if we can simplify it.
+    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+    
+    KnownZero = KnownZeroOut;
+    KnownOne  = KnownOneOut;
+    break;
+  case ISD::SETCC:
+    // If we know the result of a setcc has the top bits zero, use this info.
+    if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
+      KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+    break;
+  case ISD::SELECT:
+    if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SELECT_CC:
+    if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+      return true;
+      
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SHL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getValue();
+      SDOperand InOp = Op.getOperand(0);
+
+      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the bottom bits (which are shifted
+      // out) are never demanded.
+      if (InOp.getOpcode() == ISD::SRL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (DemandedMask & ((1ULL << ShAmt)-1)) == 0) {
+          unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue();
+          unsigned Opc = ISD::SHL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SRL;
+          }          
+          
+          SDOperand NewSA = 
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          MVT::ValueType VT = Op.getValueType();
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }      
+      
+      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> ShAmt,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      KnownZero <<= SA->getValue();
+      KnownOne  <<= SA->getValue();
+      KnownZero |= (1ULL << SA->getValue())-1;  // low bits known zero.
+    }
+    break;
+  case ISD::SRL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+      unsigned VTSize = MVT::getSizeInBits(VT);
+      SDOperand InOp = Op.getOperand(0);
+      
+      // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the top bits (which are shifted out)
+      // are never demanded.
+      if (InOp.getOpcode() == ISD::SHL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (DemandedMask & (~0ULL << (VTSize-ShAmt))) == 0) {
+          unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue();
+          unsigned Opc = ISD::SRL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SHL;
+          }          
+          
+          SDOperand NewSA =
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }      
+      
+      // Compute the new bits that are at the top now.
+      if (SimplifyDemandedBits(InOp, (DemandedMask << ShAmt) & TypeMask,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= VTSize - ShAmt;
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    break;
+  case ISD::SRA:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+      
+      // Compute the new bits that are at the top now.
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+      
+      uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask;
+
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
+      if (HighBits & DemandedMask)
+        InDemandedMask |= MVT::getIntVTSignBit(VT);
+      
+      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+      
+      // Handle the sign bits.
+      uint64_t SignBit = MVT::getIntVTSignBit(VT);
+      SignBit >>= ShAmt;  // Adjust to where it is now in the mask.
+      
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) {
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0),
+                                                 Op.getOperand(1)));
+      } else if (KnownOne & SignBit) { // New bits are known one.
+        KnownOne |= HighBits;
+      }
+    }
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+    // Sign extension.  Compute the demanded bits in the result that are not 
+    // present in the input.
+    uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask;
+    
+    // If none of the extended bits are demanded, eliminate the sextinreg.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+
+    uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
+    int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT);
+    
+    // Since the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    InputDemandedBits |= InSignBit;
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    
+    // If the input sign bit is known zero, convert this into a zero extension.
+    if (KnownZero & InSignBit)
+      return TLO.CombineTo(Op, 
+                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT));
+    
+    if (KnownOne & InSignBit) {    // Input sign bit known set
+      KnownOne |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                       // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP: {
+    MVT::ValueType VT = Op.getValueType();
+    unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
+    KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
+    KnownOne  = 0;
+    break;
+  }
+  case ISD::LOAD: {
+    if (ISD::isZEXTLoad(Op.Val)) {
+      LoadSDNode *LD = cast<LoadSDNode>(Op);
+      MVT::ValueType VT = LD->getLoadedVT();
+      KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask;
+    }
+    break;
+  }
+  case ISD::ZERO_EXTEND: {
+    uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    
+    // If none of the top bits are demanded, convert this into an any_extend.
+    uint64_t NewBits = (~InMask) & DemandedMask;
+    if (NewBits == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, 
+                                               Op.getValueType(), 
+                                               Op.getOperand(0)));
+    
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero |= NewBits;
+    break;
+  }
+  case ISD::SIGN_EXTEND: {
+    MVT::ValueType InVT = Op.getOperand(0).getValueType();
+    uint64_t InMask    = MVT::getIntVTBitMask(InVT);
+    uint64_t InSignBit = MVT::getIntVTSignBit(InVT);
+    uint64_t NewBits   = (~InMask) & DemandedMask;
+    
+    // If none of the top bits are demanded, convert this into an any_extend.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(),
+                                           Op.getOperand(0)));
+    
+    // Since some of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    uint64_t InDemandedBits = DemandedMask & InMask;
+    InDemandedBits |= InSignBit;
+    
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+                             KnownOne, TLO, Depth+1))
+      return true;
+    
+    // If the sign bit is known zero, convert this to a zero extend.
+    if (KnownZero & InSignBit)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, 
+                                               Op.getValueType(), 
+                                               Op.getOperand(0)));
+    
+    // If the sign bit is known one, the top bits match.
+    if (KnownOne & InSignBit) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ANY_EXTEND: {
+    uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    break;
+  }
+  case ISD::TRUNCATE: {
+    // Simplify the input, using demanded bit information, and compute the known
+    // zero/one bits live out.
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    
+    // If the input is only used by this truncate, see if we can shrink it based
+    // on the known demanded bits.
+    if (Op.getOperand(0).Val->hasOneUse()) {
+      SDOperand In = Op.getOperand(0);
+      switch (In.getOpcode()) {
+      default: break;
+      case ISD::SRL:
+        // Shrink SRL by a constant if none of the high bits shifted in are
+        // demanded.
+        if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
+          uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType());
+          HighBits &= ~MVT::getIntVTBitMask(Op.getValueType());
+          HighBits >>= ShAmt->getValue();
+          
+          if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) &&
+              (DemandedMask & HighBits) == 0) {
+            // None of the shifted in bits are needed.  Add a truncate of the
+            // shift input, then shift it.
+            SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, 
+                                                 Op.getValueType(), 
+                                                 In.getOperand(0));
+            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(),
+                                                   NewTrunc, In.getOperand(1)));
+          }
+        }
+        break;
+      }
+    }
+    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+    KnownZero &= OutMask;
+    KnownOne &= OutMask;
+    break;
+  }
+  case ISD::AssertZext: {
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    uint64_t InMask = MVT::getIntVTBitMask(VT);
+    if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero |= ~InMask & DemandedMask;
+    break;
+  }
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+    // Just use ComputeMaskedBits to compute output bits.
+    TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  }
+  
+  // If we know the value of all of the demanded bits, return this as a
+  // constant.
+  if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+    return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+  
+  return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified 
+/// in Mask are known to be either zero or one and return them in the 
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 
+                                                    uint64_t Mask,
+                                                    uint64_t &KnownZero, 
+                                                    uint64_t &KnownOne,
+                                                    const SelectionDAG &DAG,
+                                                    unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+  KnownZero = 0;
+  KnownOne = 0;
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op,
+                                                         unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use ComputeNumSignBits if you don't know whether Op"
+         " is a target node!");
+  return 1;
+}
+
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands 
+/// and cc. If it is unable to simplify it, return a null SDOperand.
+SDOperand
+TargetLowering::SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
+                              ISD::CondCode Cond, bool foldBooleans,
+                              DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
+  }
+
+  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) {
+    uint64_t C1 = N1C->getValue();
+    if (isa<ConstantSDNode>(N0.Val)) {
+      return DAG.FoldSetCC(VT, N0, N1, Cond);
+    } else {
+      // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+      // equality comparison, then we're just comparing whether X itself is
+      // zero.
+      if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+          N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+          N0.getOperand(1).getOpcode() == ISD::Constant) {
+        unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+        if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+            ShAmt == Log2_32(MVT::getSizeInBits(N0.getValueType()))) {
+          if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+            // (srl (ctlz x), 5) == 0  -> X != 0
+            // (srl (ctlz x), 5) != 1  -> X != 0
+            Cond = ISD::SETNE;
+          } else {
+            // (srl (ctlz x), 5) != 0  -> X == 0
+            // (srl (ctlz x), 5) == 1  -> X == 0
+            Cond = ISD::SETEQ;
+          }
+          SDOperand Zero = DAG.getConstant(0, N0.getValueType());
+          return DAG.getSetCC(VT, N0.getOperand(0).getOperand(0),
+                              Zero, Cond);
+        }
+      }
+      
+      // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+      if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+        unsigned InSize = MVT::getSizeInBits(N0.getOperand(0).getValueType());
+
+        // If the comparison constant has bits in the upper part, the
+        // zero-extended value could never match.
+        if (C1 & (~0ULL << InSize)) {
+          unsigned VSize = MVT::getSizeInBits(N0.getValueType());
+          switch (Cond) {
+          case ISD::SETUGT:
+          case ISD::SETUGE:
+          case ISD::SETEQ: return DAG.getConstant(0, VT);
+          case ISD::SETULT:
+          case ISD::SETULE:
+          case ISD::SETNE: return DAG.getConstant(1, VT);
+          case ISD::SETGT:
+          case ISD::SETGE:
+            // True if the sign bit of C1 is set.
+            return DAG.getConstant((C1 & (1ULL << (VSize-1))) != 0, VT);
+          case ISD::SETLT:
+          case ISD::SETLE:
+            // True if the sign bit of C1 isn't set.
+            return DAG.getConstant((C1 & (1ULL << (VSize-1))) == 0, VT);
+          default:
+            break;
+          }
+        }
+
+        // Otherwise, we can perform the comparison with the low bits.
+        switch (Cond) {
+        case ISD::SETEQ:
+        case ISD::SETNE:
+        case ISD::SETUGT:
+        case ISD::SETUGE:
+        case ISD::SETULT:
+        case ISD::SETULE:
+          return DAG.getSetCC(VT, N0.getOperand(0),
+                          DAG.getConstant(C1, N0.getOperand(0).getValueType()),
+                          Cond);
+        default:
+          break;   // todo, be more careful with signed comparisons
+        }
+      } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+        MVT::ValueType ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+        unsigned ExtSrcTyBits = MVT::getSizeInBits(ExtSrcTy);
+        MVT::ValueType ExtDstTy = N0.getValueType();
+        unsigned ExtDstTyBits = MVT::getSizeInBits(ExtDstTy);
+
+        // If the extended part has any inconsistent bits, it cannot ever
+        // compare equal.  In other words, they have to be all ones or all
+        // zeros.
+        uint64_t ExtBits =
+          (~0ULL >> (64-ExtSrcTyBits)) & (~0ULL << (ExtDstTyBits-1));
+        if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+          return DAG.getConstant(Cond == ISD::SETNE, VT);
+        
+        SDOperand ZextOp;
+        MVT::ValueType Op0Ty = N0.getOperand(0).getValueType();
+        if (Op0Ty == ExtSrcTy) {
+          ZextOp = N0.getOperand(0);
+        } else {
+          int64_t Imm = ~0ULL >> (64-ExtSrcTyBits);
+          ZextOp = DAG.getNode(ISD::AND, Op0Ty, N0.getOperand(0),
+                               DAG.getConstant(Imm, Op0Ty));
+        }
+        if (!DCI.isCalledByLegalizer())
+          DCI.AddToWorklist(ZextOp.Val);
+        // Otherwise, make this a use of a zext.
+        return DAG.getSetCC(VT, ZextOp, 
+                            DAG.getConstant(C1 & (~0ULL>>(64-ExtSrcTyBits)), 
+                                            ExtDstTy),
+                            Cond);
+      } else if ((N1C->getValue() == 0 || N1C->getValue() == 1) &&
+                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+        
+        // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
+        if (N0.getOpcode() == ISD::SETCC) {
+          bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getValue() != 1);
+          if (TrueWhenTrue)
+            return N0;
+          
+          // Invert the condition.
+          ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+          CC = ISD::getSetCCInverse(CC, 
+                               MVT::isInteger(N0.getOperand(0).getValueType()));
+          return DAG.getSetCC(VT, N0.getOperand(0), N0.getOperand(1), CC);
+        }
+        
+        if ((N0.getOpcode() == ISD::XOR ||
+             (N0.getOpcode() == ISD::AND && 
+              N0.getOperand(0).getOpcode() == ISD::XOR &&
+              N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+            isa<ConstantSDNode>(N0.getOperand(1)) &&
+            cast<ConstantSDNode>(N0.getOperand(1))->getValue() == 1) {
+          // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
+          // can only do this if the top bits are known zero.
+          if (DAG.MaskedValueIsZero(N0,
+                                    MVT::getIntVTBitMask(N0.getValueType())-1)){
+            // Okay, get the un-inverted input value.
+            SDOperand Val;
+            if (N0.getOpcode() == ISD::XOR)
+              Val = N0.getOperand(0);
+            else {
+              assert(N0.getOpcode() == ISD::AND && 
+                     N0.getOperand(0).getOpcode() == ISD::XOR);
+              // ((X^1)&1)^1 -> X & 1
+              Val = DAG.getNode(ISD::AND, N0.getValueType(),
+                                N0.getOperand(0).getOperand(0),
+                                N0.getOperand(1));
+            }
+            return DAG.getSetCC(VT, Val, N1,
+                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+          }
+        }
+      }
+      
+      uint64_t MinVal, MaxVal;
+      unsigned OperandBitSize = MVT::getSizeInBits(N1C->getValueType(0));
+      if (ISD::isSignedIntSetCC(Cond)) {
+        MinVal = 1ULL << (OperandBitSize-1);
+        if (OperandBitSize != 1)   // Avoid X >> 64, which is undefined.
+          MaxVal = ~0ULL >> (65-OperandBitSize);
+        else
+          MaxVal = 0;
+      } else {
+        MinVal = 0;
+        MaxVal = ~0ULL >> (64-OperandBitSize);
+      }
+
+      // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+      if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+        if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+        --C1;                                          // X >= C0 --> X > (C0-1)
+        return DAG.getSetCC(VT, N0, DAG.getConstant(C1, N1.getValueType()),
+                        (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+      }
+
+      if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+        if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+        ++C1;                                          // X <= C0 --> X < (C0+1)
+        return DAG.getSetCC(VT, N0, DAG.getConstant(C1, N1.getValueType()),
+                        (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+      }
+
+      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+        return DAG.getConstant(0, VT);      // X < MIN --> false
+      if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+        return DAG.getConstant(1, VT);      // X >= MIN --> true
+      if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+        return DAG.getConstant(0, VT);      // X > MAX --> false
+      if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+        return DAG.getConstant(1, VT);      // X <= MAX --> true
+
+      // Canonicalize setgt X, Min --> setne X, Min
+      if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+        return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
+      // Canonicalize setlt X, Max --> setne X, Max
+      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+        return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
+
+      // If we have setult X, 1, turn it into seteq X, 0
+      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+        return DAG.getSetCC(VT, N0, DAG.getConstant(MinVal, N0.getValueType()),
+                        ISD::SETEQ);
+      // If we have setugt X, Max-1, turn it into seteq X, Max
+      else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+        return DAG.getSetCC(VT, N0, DAG.getConstant(MaxVal, N0.getValueType()),
+                        ISD::SETEQ);
+
+      // If we have "setcc X, C0", check to see if we can shrink the immediate
+      // by changing cc.
+
+      // SETUGT X, SINTMAX  -> SETLT X, 0
+      if (Cond == ISD::SETUGT && OperandBitSize != 1 &&
+          C1 == (~0ULL >> (65-OperandBitSize)))
+        return DAG.getSetCC(VT, N0, DAG.getConstant(0, N1.getValueType()),
+                            ISD::SETLT);
+
+      // FIXME: Implement the rest of these.
+
+      // Fold bit comparisons when we can.
+      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+          VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+        if (ConstantSDNode *AndRHS =
+                    dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+          if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
+            // Perform the xform if the AND RHS is a single bit.
+            if (isPowerOf2_64(AndRHS->getValue())) {
+              return DAG.getNode(ISD::SRL, VT, N0,
+                             DAG.getConstant(Log2_64(AndRHS->getValue()),
+                                             getShiftAmountTy()));
+            }
+          } else if (Cond == ISD::SETEQ && C1 == AndRHS->getValue()) {
+            // (X & 8) == 8  -->  (X & 8) >> 3
+            // Perform the xform if C1 is a single bit.
+            if (isPowerOf2_64(C1)) {
+              return DAG.getNode(ISD::SRL, VT, N0,
+                          DAG.getConstant(Log2_64(C1), getShiftAmountTy()));
+            }
+          }
+        }
+    }
+  } else if (isa<ConstantSDNode>(N0.Val)) {
+      // Ensure that the constant occurs on the RHS.
+    return DAG.getSetCC(VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+  }
+
+  if (isa<ConstantFPSDNode>(N0.Val)) {
+    // Constant fold or commute setcc.
+    SDOperand O = DAG.FoldSetCC(VT, N0, N1, Cond);    
+    if (O.Val) return O;
+  }
+
+  if (N0 == N1) {
+    // We can always fold X == X for integer setcc's.
+    if (MVT::isInteger(N0.getValueType()))
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    unsigned UOF = ISD::getUnorderedFlavor(Cond);
+    if (UOF == 2)   // FP operators that are undefined on NaNs.
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+      return DAG.getConstant(UOF, VT);
+    // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
+    // if it is not already.
+    ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+    if (NewCond != Cond)
+      return DAG.getSetCC(VT, N0, N1, NewCond);
+  }
+
+  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+      MVT::isInteger(N0.getValueType())) {
+    if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+        N0.getOpcode() == ISD::XOR) {
+      // Simplify (X+Y) == (X+Z) -->  Y == Z
+      if (N0.getOpcode() == N1.getOpcode()) {
+        if (N0.getOperand(0) == N1.getOperand(0))
+          return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(1), Cond);
+        if (N0.getOperand(1) == N1.getOperand(1))
+          return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(0), Cond);
+        if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+          // If X op Y == Y op X, try other combinations.
+          if (N0.getOperand(0) == N1.getOperand(1))
+            return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(0), Cond);
+          if (N0.getOperand(1) == N1.getOperand(0))
+            return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(1), Cond);
+        }
+      }
+      
+      if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+        if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+          // Turn (X+C1) == C2 --> X == C2-C1
+          if (N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse()) {
+            return DAG.getSetCC(VT, N0.getOperand(0),
+                              DAG.getConstant(RHSC->getValue()-LHSR->getValue(),
+                                N0.getValueType()), Cond);
+          }
+          
+          // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+          if (N0.getOpcode() == ISD::XOR)
+            // If we know that all of the inverted bits are zero, don't bother
+            // performing the inversion.
+            if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getValue()))
+              return DAG.getSetCC(VT, N0.getOperand(0),
+                              DAG.getConstant(LHSR->getValue()^RHSC->getValue(),
+                                              N0.getValueType()), Cond);
+        }
+        
+        // Turn (C1-X) == C2 --> X == C1-C2
+        if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+          if (N0.getOpcode() == ISD::SUB && N0.Val->hasOneUse()) {
+            return DAG.getSetCC(VT, N0.getOperand(1),
+                             DAG.getConstant(SUBC->getValue()-RHSC->getValue(),
+                                             N0.getValueType()), Cond);
+          }
+        }          
+      }
+
+      // Simplify (X+Z) == X -->  Z == 0
+      if (N0.getOperand(0) == N1)
+        return DAG.getSetCC(VT, N0.getOperand(1),
+                        DAG.getConstant(0, N0.getValueType()), Cond);
+      if (N0.getOperand(1) == N1) {
+        if (DAG.isCommutativeBinOp(N0.getOpcode()))
+          return DAG.getSetCC(VT, N0.getOperand(0),
+                          DAG.getConstant(0, N0.getValueType()), Cond);
+        else if (N0.Val->hasOneUse()) {
+          assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // (Z-X) == X  --> Z == X<<1
+          SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(),
+                                     N1, 
+                                     DAG.getConstant(1, getShiftAmountTy()));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.Val);
+          return DAG.getSetCC(VT, N0.getOperand(0), SH, Cond);
+        }
+      }
+    }
+
+    if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+        N1.getOpcode() == ISD::XOR) {
+      // Simplify  X == (X+Z) -->  Z == 0
+      if (N1.getOperand(0) == N0) {
+        return DAG.getSetCC(VT, N1.getOperand(1),
+                        DAG.getConstant(0, N1.getValueType()), Cond);
+      } else if (N1.getOperand(1) == N0) {
+        if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+          return DAG.getSetCC(VT, N1.getOperand(0),
+                          DAG.getConstant(0, N1.getValueType()), Cond);
+        } else if (N1.Val->hasOneUse()) {
+          assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // X == (Z-X)  --> X<<1 == Z
+          SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(), N0, 
+                                     DAG.getConstant(1, getShiftAmountTy()));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.Val);
+          return DAG.getSetCC(VT, SH, N1.getOperand(0), Cond);
+        }
+      }
+    }
+  }
+
+  // Fold away ALL boolean setcc's.
+  SDOperand Temp;
+  if (N0.getValueType() == MVT::i1 && foldBooleans) {
+    switch (Cond) {
+    default: assert(0 && "Unknown integer setcc!");
+    case ISD::SETEQ:  // X == Y  -> (X^Y)^1
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, N1);
+      N0 = DAG.getNode(ISD::XOR, MVT::i1, Temp, DAG.getConstant(1, MVT::i1));
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.Val);
+      break;
+    case ISD::SETNE:  // X != Y   -->  (X^Y)
+      N0 = DAG.getNode(ISD::XOR, MVT::i1, N0, N1);
+      break;
+    case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  X^1 & Y
+    case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  X^1 & Y
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1));
+      N0 = DAG.getNode(ISD::AND, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.Val);
+      break;
+    case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  Y^1 & X
+    case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  Y^1 & X
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1));
+      N0 = DAG.getNode(ISD::AND, MVT::i1, N0, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.Val);
+      break;
+    case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  X^1 | Y
+    case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  X^1 | Y
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1));
+      N0 = DAG.getNode(ISD::OR, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.Val);
+      break;
+    case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  Y^1 | X
+    case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  Y^1 | X
+      Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1));
+      N0 = DAG.getNode(ISD::OR, MVT::i1, N0, Temp);
+      break;
+    }
+    if (VT != MVT::i1) {
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(N0.Val);
+      // FIXME: If running after legalize, we probably can't do this.
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, VT, N0);
+    }
+    return N0;
+  }
+
+  // Could not fold it.
+  return SDOperand();
+}
+
+SDOperand TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+  // Default implementation: no optimization.
+  return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+//  Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+  // FIXME: lots more standard ones to handle.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'r': return C_RegisterClass;
+    case 'm':    // memory
+    case 'o':    // offsetable
+    case 'V':    // not offsetable
+      return C_Memory;
+    case 'i':    // Simple Integer or Relocatable Constant
+    case 'n':    // Simple Integer
+    case 's':    // Relocatable Constant
+    case 'X':    // Allow ANY value.
+    case 'I':    // Target registers.
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'O':
+    case 'P':
+      return C_Other;
+    }
+  }
+  
+  if (Constraint.size() > 1 && Constraint[0] == '{' && 
+      Constraint[Constraint.size()-1] == '}')
+    return C_Register;
+  return C_Unknown;
+}
+
+/// isOperandValidForConstraint - Return the specified operand (possibly
+/// modified) if the specified SDOperand is valid for the specified target
+/// constraint letter, otherwise return null.
+SDOperand TargetLowering::isOperandValidForConstraint(SDOperand Op,
+                                                      char ConstraintLetter,
+                                                      SelectionDAG &DAG) {
+  switch (ConstraintLetter) {
+  default: break;
+  case 'i':    // Simple Integer or Relocatable Constant
+  case 'n':    // Simple Integer
+  case 's':    // Relocatable Constant
+  case 'X': {  // Allows any operand.
+    // These operands are interested in values of the form (GV+C), where C may
+    // be folded in as an offset of GV, or it may be explicitly added.  Also, it
+    // is possible and fine if either GV or C are missing.
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+    
+    // If we have "(add GV, C)", pull out GV/C
+    if (Op.getOpcode() == ISD::ADD) {
+      C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+      GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+      if (C == 0 || GA == 0) {
+        C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+        GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+      }
+      if (C == 0 || GA == 0)
+        C = 0, GA = 0;
+    }
+    
+    // If we find a valid operand, map to the TargetXXX version so that the
+    // value itself doesn't get selected.
+    if (GA) {   // Either &GV   or   &GV+C
+      if (ConstraintLetter != 'n') {
+        int64_t Offs = GA->getOffset();
+        if (C) Offs += C->getValue();
+        return DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getValueType(),
+                                          Offs);
+      }
+    }
+    if (C) {   // just C, no GV.
+      // Simple constants are not allowed for 's'.
+      if (ConstraintLetter != 's')
+        return DAG.getTargetConstant(C->getValue(), Op.getValueType());
+    }
+    break;
+  }
+  }
+  return SDOperand(0,0);
+}
+
+std::vector<unsigned> TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  MVT::ValueType VT) const {
+  return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             MVT::ValueType VT) const {
+  if (Constraint[0] != '{')
+    return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+  assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+  // Remove the braces from around the name.
+  std::string RegName(Constraint.begin()+1, Constraint.end()-1);
+
+  // Figure out which register class contains this reg.
+  const MRegisterInfo *RI = TM.getRegisterInfo();
+  for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+       E = RI->regclass_end(); RCI != E; ++RCI) {
+    const TargetRegisterClass *RC = *RCI;
+    
+    // If none of the the value types for this register class are valid, we 
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    bool isLegal = false;
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (isTypeLegal(*I)) {
+        isLegal = true;
+        break;
+      }
+    }
+    
+    if (!isLegal) continue;
+    
+    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
+         I != E; ++I) {
+      if (StringsEqualNoCase(RegName, RI->get(*I).Name))
+        return std::make_pair(*I, RC);
+    }
+  }
+  
+  return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+}
+
+//===----------------------------------------------------------------------===//
+//  Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, 
+                                           const Type *Ty) const {
+  // The default implementation of this implements a conservative RISCy, r+r and
+  // r+i addr mode.
+
+  // Allows a sign-extended 16-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+    return false;
+  
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+  
+  // Only support r+r, 
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  }
+  
+  return true;
+}
+
+// Magic for divide replacement
+
+struct ms {
+  int64_t m;  // magic number
+  int64_t s;  // shift amount
+};
+
+struct mu {
+  uint64_t m; // magic number
+  int64_t a;  // add indicator
+  int64_t s;  // shift amount
+};
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic32(int32_t d) {
+  int32_t p;
+  uint32_t ad, anc, delta, q1, r1, q2, r2, t;
+  const uint32_t two31 = 0x80000000U;
+  struct ms mag;
+  
+  ad = abs(d);
+  t = two31 + ((uint32_t)d >> 31);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 31;               // initialize p
+  q1 = two31/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two31 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two31/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two31 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+  
+  mag.m = (int32_t)(q2 + 1); // make sure to sign extend
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 32;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu32(uint32_t d) {
+  int32_t p;
+  uint32_t nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 31;                   // initialize p
+  q1 = 0x80000000/nc;       // initialize q1 = 2p/nc
+  r1 = 0x80000000 - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFF/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFF - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFF) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x80000000) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 32;  // resulting shift
+  return magu;
+}
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic64(int64_t d) {
+  int64_t p;
+  uint64_t ad, anc, delta, q1, r1, q2, r2, t;
+  const uint64_t two63 = 9223372036854775808ULL; // 2^63
+  struct ms mag;
+  
+  ad = d >= 0 ? d : -d;
+  t = two63 + ((uint64_t)d >> 63);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 63;               // initialize p
+  q1 = two63/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two63 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two63/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two63 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+  
+  mag.m = q2 + 1;
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 64;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu64(uint64_t d)
+{
+  int64_t p;
+  uint64_t nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 63;                   // initialize p
+  q1 = 0x8000000000000000ull/nc;       // initialize q1 = 2p/nc
+  r1 = 0x8000000000000000ull - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFFFFFFFFFFull/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFFFFFFFFFFull - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x8000000000000000ull) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 64;  // resulting shift
+  return magu;
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 
+                                    std::vector<SDNode*>* Created) const {
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // Check to see if we can do this.
+  if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+    return SDOperand();       // BuildSDIV only operates on i32 or i64
+  if (!isOperationLegal(ISD::MULHS, VT))
+    return SDOperand();       // Make sure the target supports MULHS.
+  
+  int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended();
+  ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d);
+  
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0),
+                            DAG.getConstant(magics.m, VT));
+  // If d > 0 and m < 0, add the numerator
+  if (d > 0 && magics.m < 0) { 
+    Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.Val);
+  }
+  // If d < 0 and m > 0, subtract the numerator.
+  if (d < 0 && magics.m > 0) {
+    Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.Val);
+  }
+  // Shift right algebraic if shift value is nonzero
+  if (magics.s > 0) {
+    Q = DAG.getNode(ISD::SRA, VT, Q, 
+                    DAG.getConstant(magics.s, getShiftAmountTy()));
+    if (Created)
+      Created->push_back(Q.Val);
+  }
+  // Extract the sign bit and add it to the quotient
+  SDOperand T =
+    DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                                 getShiftAmountTy()));
+  if (Created)
+    Created->push_back(T.Val);
+  return DAG.getNode(ISD::ADD, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+                                    std::vector<SDNode*>* Created) const {
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // Check to see if we can do this.
+  if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+    return SDOperand();       // BuildUDIV only operates on i32 or i64
+  if (!isOperationLegal(ISD::MULHU, VT))
+    return SDOperand();       // Make sure the target supports MULHU.
+  
+  uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue();
+  mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
+  
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
+                            DAG.getConstant(magics.m, VT));
+  if (Created)
+    Created->push_back(Q.Val);
+
+  if (magics.a == 0) {
+    return DAG.getNode(ISD::SRL, VT, Q, 
+                       DAG.getConstant(magics.s, getShiftAmountTy()));
+  } else {
+    SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q);
+    if (Created)
+      Created->push_back(NPQ.Val);
+    NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 
+                      DAG.getConstant(1, getShiftAmountTy()));
+    if (Created)
+      Created->push_back(NPQ.Val);
+    NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q);
+    if (Created)
+      Created->push_back(NPQ.Val);
+    return DAG.getNode(ISD::SRL, VT, NPQ, 
+                       DAG.getConstant(magics.s-1, getShiftAmountTy()));
+  }
+}
commit	f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc	[log] [tgz]
author	Dan Gohman <djg@cray.com>	Wed Jul 18 16:29:46 2007 +0000
committer	Dan Gohman <djg@cray.com>	Wed Jul 18 16:29:46 2007 +0000
tree	ebb79ea1ee5e3bc1fdf38541a811a8b804f0679a