Sync upstream to r102410.
Re-turn on sdk.

Change-Id: I91a890863989a67243b4d2dfd1ae09b843ebaeaf
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 8840622..4008a6a 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -31,12 +31,12 @@
 // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
 static cl::opt<int>
 DebugDiv("agg-antidep-debugdiv",
-                      cl::desc("Debug control for aggressive anti-dep breaker"),
-                      cl::init(0), cl::Hidden);
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
 static cl::opt<int>
 DebugMod("agg-antidep-debugmod",
-                      cl::desc("Debug control for aggressive anti-dep breaker"),
-                      cl::init(0), cl::Hidden);
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
 
 AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
                                                MachineBasicBlock *BB) :
@@ -210,7 +210,7 @@
 }
 
 void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
-                                     unsigned InsertPosIndex) {
+                                       unsigned InsertPosIndex) {
   assert(Count < InsertPosIndex && "Instruction index out of expected range!");
 
   std::set<unsigned> PassthruRegs;
@@ -244,7 +244,7 @@
 }
 
 bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
-                                            MachineOperand& MO)
+                                                MachineOperand& MO)
 {
   if (!MO.isReg() || !MO.isImplicit())
     return false;
@@ -281,9 +281,9 @@
 
 /// AntiDepEdges - Return in Edges the anti- and output- dependencies
 /// in SU that we want to consider for breaking.
-static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) {
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
   SmallSet<unsigned, 4> RegSet;
-  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
     if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
       unsigned Reg = P->getReg();
@@ -297,14 +297,14 @@
 
 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
 /// critical path.
-static SUnit *CriticalPathStep(SUnit *SU) {
-  SDep *Next = 0;
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
   unsigned NextDepth = 0;
   // Find the predecessor edge with the greatest depth.
   if (SU != 0) {
-    for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+    for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
          P != PE; ++P) {
-      SUnit *PredSU = P->getSUnit();
+      const SUnit *PredSU = P->getSUnit();
       unsigned PredLatency = P->getLatency();
       unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
       // In the case of a latency tie, prefer an anti-dependency edge over
@@ -359,8 +359,7 @@
 
 void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
                                                   unsigned Count,
-                                              std::set<unsigned>& PassthruRegs)
-{
+                                             std::set<unsigned>& PassthruRegs) {
   unsigned *DefIndices = State->GetDefIndices();
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
@@ -439,7 +438,7 @@
 }
 
 void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
-                                           unsigned Count) {
+                                               unsigned Count) {
   DEBUG(dbgs() << "\tUse Groups:");
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
@@ -704,9 +703,9 @@
 /// ScheduleDAG and break them by renaming registers.
 ///
 unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
-                              std::vector<SUnit>& SUnits,
-                              MachineBasicBlock::iterator& Begin,
-                              MachineBasicBlock::iterator& End,
+                              const std::vector<SUnit>& SUnits,
+                              MachineBasicBlock::iterator Begin,
+                              MachineBasicBlock::iterator End,
                               unsigned InsertPosIndex) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
@@ -721,20 +720,21 @@
   RenameOrderType RenameOrder;
 
   // ...need a map from MI to SUnit.
-  std::map<MachineInstr *, SUnit *> MISUnitMap;
+  std::map<MachineInstr *, const SUnit *> MISUnitMap;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU));
+    const SUnit *SU = &SUnits[i];
+    MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+                                                               SU));
   }
 
   // Track progress along the critical path through the SUnit graph as
   // we walk the instructions. This is needed for regclasses that only
   // break critical-path anti-dependencies.
-  SUnit *CriticalPathSU = 0;
+  const SUnit *CriticalPathSU = 0;
   MachineInstr *CriticalPathMI = 0;
   if (CriticalPathSet.any()) {
     for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-      SUnit *SU = &SUnits[i];
+      const SUnit *SU = &SUnits[i];
       if (!CriticalPathSU ||
           ((SU->getDepth() + SU->Latency) >
            (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
@@ -775,8 +775,8 @@
 
     // The dependence edges that represent anti- and output-
     // dependencies that are candidates for breaking.
-    std::vector<SDep*> Edges;
-    SUnit *PathSU = MISUnitMap[MI];
+    std::vector<const SDep *> Edges;
+    const SUnit *PathSU = MISUnitMap[MI];
     AntiDepEdges(PathSU, Edges);
 
     // If MI is not on the critical path, then we don't rename
@@ -794,7 +794,7 @@
     if (!MI->isKill()) {
       // Attempt to break each anti-dependency...
       for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
-        SDep *Edge = Edges[i];
+        const SDep *Edge = Edges[i];
         SUnit *NextSU = Edge->getSUnit();
 
         if ((Edge->getKind() != SDep::Anti) &&
@@ -838,7 +838,7 @@
           // Also, if there are dependencies on other SUnits with the
           // same register as the anti-dependency, don't attempt to
           // break it.
-          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
                  PE = PathSU->Preds.end(); P != PE; ++P) {
             if (P->getSUnit() == NextSU ?
                 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
@@ -847,7 +847,7 @@
               break;
             }
           }
-          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
                  PE = PathSU->Preds.end(); P != PE; ++P) {
             if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
                 (P->getKind() != SDep::Output)) {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index a62d68c..506d43e 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -142,9 +142,9 @@
     /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
-    unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   MachineBasicBlock::iterator& Begin,
-                                   MachineBasicBlock::iterator& End,
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex);
 
     /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
new file mode 100644
index 0000000..f71eee5
--- /dev/null
+++ b/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,285 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                                  const unsigned *Indices,
+                                  const unsigned *IndicesEnd,
+                                  unsigned CurIndex) {
+  // Base case: We're done.
+  if (Indices && Indices == IndicesEnd)
+    return CurIndex;
+
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+        EI != EE; ++EI) {
+      if (Indices && *Indices == unsigned(EI - EB))
+        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // Given an array type, recursively traverse the elements.
+  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      if (Indices && *Indices == i)
+        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // We haven't found the type we're looking for, so keep searching.
+  return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                           SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI)
+      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+                      StartingOffset + SL->getElementOffset(EI - EB));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+                      StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty->isVoidTy())
+    return;
+  // Base case: we can get an EVT for this LLVM IR type.
+  ValueVTs.push_back(TLI.getValueType(Ty));
+  if (Offsets)
+    Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+  V = V->stripPointerCasts();
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+
+  if (GV && GV->getName() == ".llvm.eh.catch.all.value") {
+    assert(GV->hasInitializer() &&
+           "The EH catch-all value must have an initializer");
+    Value *Init = GV->getInitializer();
+    GV = dyn_cast<GlobalVariable>(Init);
+    if (!GV) V = cast<ConstantPointerNull>(Init);
+  }
+
+  assert((GV || isa<ConstantPointerNull>(V)) &&
+         "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+                                const TargetLowering &TLI) {
+  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+    InlineAsm::ConstraintInfo &CI = CInfos[i];
+    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+      if (CType == TargetLowering::C_Memory)
+        return true;
+    }
+
+    // Indirect operand accesses access memory.
+    if (CI.isIndirect)
+      return true;
+  }
+
+  return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+  ISD::CondCode FPC, FOC;
+  switch (Pred) {
+  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
+  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
+  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+  default:
+    llvm_unreachable("Invalid FCmp predicate opcode!");
+    FOC = FPC = ISD::SETFALSE;
+    break;
+  }
+  if (FiniteOnlyFPMath())
+    return FOC;
+  else
+    return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+  switch (Pred) {
+  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
+  case ICmpInst::ICMP_NE:  return ISD::SETNE;
+  case ICmpInst::ICMP_SLE: return ISD::SETLE;
+  case ICmpInst::ICMP_ULE: return ISD::SETULE;
+  case ICmpInst::ICMP_SGE: return ISD::SETGE;
+  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+  case ICmpInst::ICMP_SLT: return ISD::SETLT;
+  case ICmpInst::ICMP_ULT: return ISD::SETULT;
+  case ICmpInst::ICMP_SGT: return ISD::SETGT;
+  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+  default:
+    llvm_unreachable("Invalid ICmp predicate opcode!");
+    return ISD::SETNE;
+  }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+                                const TargetLowering &TLI) {
+  const Instruction *I = CS.getInstruction();
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+  const Function *F = ExitBB->getParent();
+
+  // The block must end in a return statement or unreachable.
+  //
+  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+  // an unreachable, for now. The way tailcall optimization is currently
+  // implemented means it will add an epilogue followed by a jump. That is
+  // not profitable. Also, if the callee is a special function (e.g.
+  // longjmp on x86), it can end up causing miscompilation that has not
+  // been fully understood.
+  if (!Ret &&
+      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      // Debug info intrinsics do not get in the way of tail call optimization.
+      if (isa<DbgInfoIntrinsic>(BBI))
+        continue;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (U->getOperand(0)->getType()->isPointerTy() &&
+          U->getType()->isPointerTy())))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
+
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 9fa2ecd..d232e55 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -2,6 +2,7 @@
 
 codegen_SRC_FILES :=	\
 	AggressiveAntiDepBreaker.cpp	\
+	Analysis.cpp	\
 	BranchFolding.cpp	\
 	CalcSpillWeights.cpp	\
 	CodePlacementOpt.cpp	\
@@ -29,6 +30,7 @@
 	MachineFunction.cpp	\
 	MachineFunctionAnalysis.cpp	\
 	MachineFunctionPass.cpp	\
+	MachineFunctionPrinterPass.cpp	\
 	MachineInstr.cpp	\
 	MachineLICM.cpp	\
 	MachineLoopInfo.cpp	\
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 3ee30c6..086b757 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -39,9 +39,9 @@
   /// basic-block region and break them by renaming registers. Return
   /// the number of anti-dependencies broken.
   ///
-  virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                MachineBasicBlock::iterator& Begin,
-                                MachineBasicBlock::iterator& End,
+  virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                MachineBasicBlock::iterator Begin,
+                                MachineBasicBlock::iterator End,
                                 unsigned InsertPosIndex) =0;
   
   /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index 62601f0..dcad0a9 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -5,9 +5,6 @@
 	DIE.cpp	\
 	DwarfDebug.cpp	\
 	DwarfException.cpp	\
-	DwarfLabel.cpp	\
-	DwarfPrinter.cpp	\
-	DwarfWriter.cpp	\
 	OcamlGCPrinter.cpp
 
 # For the host
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bbeb026..e907f07 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -13,11 +13,9 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constants.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
 #include "llvm/Module.h"
-#include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -27,53 +25,67 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
-#include <cerrno>
+#include "llvm/Support/Timer.h"
 #include <ctype.h>
 using namespace llvm;
 
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 char AsmPrinter::ID = 0;
-AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
-                       MCContext &Ctx, MCStreamer &Streamer,
-                       const MCAsmInfo *T)
-  : MachineFunctionPass(&ID), O(o),
-    TM(tm), MAI(T), TRI(tm.getRegisterInfo()),
-    OutContext(Ctx), OutStreamer(Streamer),
-    LastMI(0), LastFn(0), Counter(~0U), PrevDLT(NULL) {
-  DW = 0; MMI = 0;
+
+typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
+static gcp_map_type &getGCMap(void *&P) {
+  if (P == 0)
+    P = new gcp_map_type();
+  return *(gcp_map_type*)P;
+}
+
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+  : MachineFunctionPass(&ID),
+    TM(tm), MAI(tm.getMCAsmInfo()),
+    OutContext(Streamer.getContext()),
+    OutStreamer(Streamer),
+    LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
+  DD = 0; DE = 0; MMI = 0; LI = 0;
+  GCMetadataPrinters = 0;
   VerboseAsm = Streamer.isVerboseAsm();
 }
 
 AsmPrinter::~AsmPrinter() {
-  for (gcp_iterator I = GCMetadataPrinters.begin(),
-                    E = GCMetadataPrinters.end(); I != E; ++I)
-    delete I->second;
+  assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
+  
+  if (GCMetadataPrinters != 0) {
+    gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+    
+    for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
+      delete I->second;
+    delete &GCMap;
+    GCMetadataPrinters = 0;
+  }
   
   delete &OutStreamer;
-  delete &OutContext;
 }
 
 /// getFunctionNumber - Return a unique ID for the current function.
@@ -82,30 +94,41 @@
   return MF->getFunctionNumber();
 }
 
-TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
   return TM.getTargetLowering()->getObjFileLowering();
 }
 
+
+/// getTargetData - Return information about data layout.
+const TargetData &AsmPrinter::getTargetData() const {
+  return *TM.getTargetData();
+}
+
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
   return OutStreamer.getCurrentSection();
 }
 
 
+
 void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<MachineModuleInfo>();
   AU.addRequired<GCModuleInfo>();
-  if (VerboseAsm)
+  if (isVerbose())
     AU.addRequired<MachineLoopInfo>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MMI->AnalyzeModule(M);
+
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
   
-  Mang = new Mangler(*MAI);
+  Mang = new Mangler(OutContext, *TM.getTargetData());
   
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
@@ -121,20 +144,22 @@
   assert(MI && "AsmPrinter didn't require GCModuleInfo?");
   for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
     if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
-      MP->beginAssembly(O, *this, *MAI);
-  
-  if (!M.getModuleInlineAsm().empty())
-    O << MAI->getCommentString() << " Start of file scope inline assembly\n"
-      << M.getModuleInlineAsm()
-      << '\n' << MAI->getCommentString()
-      << " End of file scope inline assembly\n";
+      MP->beginAssembly(*this);
 
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  if (MMI)
-    MMI->AnalyzeModule(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  if (DW)
-    DW->BeginModule(&M, MMI, O, this, MAI);
+  // Emit module-level inline asm if it exists.
+  if (!M.getModuleInlineAsm().empty()) {
+    OutStreamer.AddComment("Start of file scope inline assembly");
+    OutStreamer.AddBlankLine();
+    EmitInlineAsm(M.getModuleInlineAsm(), 0/*no loc cookie*/);
+    OutStreamer.AddComment("End of file scope inline assembly");
+    OutStreamer.AddBlankLine();
+  }
+
+  if (MAI->doesSupportDebugInformation())
+    DD = new DwarfDebug(this, &M);
+    
+  if (MAI->doesSupportExceptionHandling())
+    DE = new DwarfException(this);
 
   return false;
 }
@@ -161,7 +186,7 @@
       // .linkonce discard
       // FIXME: It would be nice to use .linkonce samesize for non-common
       // globals.
-      O << LinkOnce;
+      OutStreamer.EmitRawText(StringRef(LinkOnce));
     } else {
       // .weak _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
@@ -194,7 +219,7 @@
   if (EmitSpecialLLVMGlobal(GV))
     return;
 
-  MCSymbol *GVSym = GetGlobalValueSymbol(GV);
+  MCSymbol *GVSym = Mang->getSymbol(GV);
   EmitVisibility(GVSym, GV->getVisibility());
 
   if (MAI->hasDotTypeDotSizeDirective())
@@ -204,13 +229,21 @@
 
   const TargetData *TD = TM.getTargetData();
   unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType());
-  unsigned AlignLog = TD->getPreferredAlignmentLog(GV);
+  
+  // If the alignment is specified, we *must* obey it.  Overaligning a global
+  // with a specified alignment is a prompt way to break globals emitted to
+  // sections and expected to be contiguous (e.g. ObjC metadata).
+  unsigned AlignLog;
+  if (unsigned GVAlign = GV->getAlignment())
+    AlignLog = Log2_32(GVAlign);
+  else
+    AlignLog = TD->getPreferredAlignmentLog(GV);
   
   // Handle common and BSS local symbols (.lcomm).
   if (GVKind.isCommon() || GVKind.isBSSLocal()) {
     if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
     
-    if (VerboseAsm) {
+    if (isVerbose()) {
       WriteAsOperand(OutStreamer.GetCommentOS(), GV,
                      /*PrintType=*/false, GV->getParent());
       OutStreamer.GetCommentOS() << '\n';
@@ -263,7 +296,7 @@
   EmitLinkage(GV->getLinkage(), GVSym);
   EmitAlignment(AlignLog, GV);
 
-  if (VerboseAsm) {
+  if (isVerbose()) {
     WriteAsOperand(OutStreamer.GetCommentOS(), GV,
                    /*PrintType=*/false, GV->getParent());
     OutStreamer.GetCommentOS() << '\n';
@@ -297,7 +330,7 @@
   if (MAI->hasDotTypeDotSizeDirective())
     OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
 
-  if (VerboseAsm) {
+  if (isVerbose()) {
     WriteAsOperand(OutStreamer.GetCommentOS(), F,
                    /*PrintType=*/false, F->getParent());
     OutStreamer.GetCommentOS() << '\n';
@@ -307,15 +340,43 @@
   // do their wild and crazy things as required.
   EmitFunctionEntryLabel();
   
+  // If the function had address-taken blocks that got deleted, then we have
+  // references to the dangling symbols.  Emit them at the start of the function
+  // so that we don't get references to undefined symbols.
+  std::vector<MCSymbol*> DeadBlockSyms;
+  MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+  for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+    OutStreamer.AddComment("Address taken block that was later removed");
+    OutStreamer.EmitLabel(DeadBlockSyms[i]);
+  }
+  
   // Add some workaround for linkonce linkage on Cygwin\MinGW.
   if (MAI->getLinkOnceDirective() != 0 &&
-      (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
+      (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
     // FIXME: What is this?
-    O << "Lllvm$workaround$fake$stub$" << *CurrentFnSym << ":\n";
+    MCSymbol *FakeStub = 
+      OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
+                                   CurrentFnSym->getName());
+    OutStreamer.EmitLabel(FakeStub);
+  }
   
   // Emit pre-function debug and/or EH information.
-  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
-    DW->BeginFunction(MF);
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->BeginFunction(MF);
+    } else {
+      DE->BeginFunction(MF);
+    }
+  }
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->beginFunction(MF);
+    } else {
+      DD->beginFunction(MF);
+    }
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -330,19 +391,17 @@
   const MachineFunction *MF = MI.getParent()->getParent();
   const TargetMachine &TM = MF->getTarget();
   
-  if (!MI.getDebugLoc().isUnknown()) {
-    DILocation DLT = MF->getDILocation(MI.getDebugLoc());
-    
-    // Print source line info.
-    DIScope Scope = DLT.getScope();
+  DebugLoc DL = MI.getDebugLoc();
+  if (!DL.isUnknown()) {          // Print source line info.
+    DIScope Scope(DL.getScope(MF->getFunction()->getContext()));
     // Omit the directory, because it's likely to be long and uninteresting.
-    if (!Scope.isNull())
+    if (Scope.Verify())
       CommentOS << Scope.getFilename();
     else
       CommentOS << "<unknown>";
-    CommentOS << ':' << DLT.getLineNumber();
-    if (DLT.getColumnNumber() != 0)
-      CommentOS << ':' << DLT.getColumnNumber();
+    CommentOS << ':' << DL.getLine();
+    if (DL.getCol() != 0)
+      CommentOS << ':' << DL.getCol();
     CommentOS << '\n';
   }
   
@@ -381,7 +440,78 @@
   }
 }
 
+/// EmitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+  unsigned RegNo = MI->getOperand(0).getReg();
+  AP.OutStreamer.AddComment(Twine("implicit-def: ") +
+                            AP.TM.getRegisterInfo()->getName(RegNo));
+  AP.OutStreamer.AddBlankLine();
+}
 
+static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
+  std::string Str = "kill:";
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &Op = MI->getOperand(i);
+    assert(Op.isReg() && "KILL instruction must have only register operands");
+    Str += ' ';
+    Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+    Str += (Op.isDef() ? "<def>" : "<kill>");
+  }
+  AP.OutStreamer.AddComment(Str);
+  AP.OutStreamer.AddBlankLine();
+}
+
+/// EmitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so.  A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+  // This code handles only the 3-operand target-independent form.
+  if (MI->getNumOperands() != 3)
+    return false;
+
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+  OS << V.getName() << " <- ";
+
+  // Register or immediate value. Register 0 means undef.
+  if (MI->getOperand(0).isFPImm()) {
+    APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+    if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+      OS << (double)APF.convertToFloat();
+    } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+      OS << APF.convertToDouble();
+    } else {
+      // There is no good way to print long double.  Convert a copy to
+      // double.  Ah well, it's only a comment.
+      bool ignored;
+      APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                  &ignored);
+      OS << "(long double) " << APF.convertToDouble();
+    }
+  } else if (MI->getOperand(0).isImm()) {
+    OS << MI->getOperand(0).getImm();
+  } else {
+    assert(MI->getOperand(0).isReg() && "Unknown operand type");
+    if (MI->getOperand(0).getReg() == 0) {
+      // Suppress offset, it is not meaningful here.
+      OS << "undef";
+      // NOTE: Want this comment at start of line, don't emit with AddComment.
+      AP.OutStreamer.EmitRawText(OS.str());
+      return true;
+    }
+    OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+  }
+  
+  OS << '+' << MI->getOperand(1).getImm();
+  // NOTE: Want this comment at start of line, don't emit with AddComment.
+  AP.OutStreamer.EmitRawText(OS.str());
+  return true;
+}
 
 /// EmitFunctionBody - This method emits the body and trailer for a
 /// function.
@@ -389,6 +519,8 @@
   // Emit target-specific gunk before the function body.
   EmitFunctionBodyStart();
   
+  bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
+  
   // Print out code for the function.
   bool HasAnyRealCode = false;
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
@@ -403,52 +535,104 @@
       
       ++EmittedInsts;
       
-      // FIXME: Clean up processDebugLoc.
-      processDebugLoc(II, true);
+      if (ShouldPrintDebugScopes) {
+	if (TimePassesIsEnabled) {
+	  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+	  DD->beginScope(II);
+	} else {
+	  DD->beginScope(II);
+	}
+      }
       
-      if (VerboseAsm)
+      if (isVerbose())
         EmitComments(*II, OutStreamer.GetCommentOS());
 
       switch (II->getOpcode()) {
       case TargetOpcode::DBG_LABEL:
       case TargetOpcode::EH_LABEL:
       case TargetOpcode::GC_LABEL:
-        printLabelInst(II);
+        OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
         break;
       case TargetOpcode::INLINEASM:
-        printInlineAsm(II);
+        EmitInlineAsm(II);
+        break;
+      case TargetOpcode::DBG_VALUE:
+        if (isVerbose()) {
+          if (!EmitDebugValueComment(II, *this))
+            EmitInstruction(II);
+        }
         break;
       case TargetOpcode::IMPLICIT_DEF:
-        printImplicitDef(II);
+        if (isVerbose()) EmitImplicitDef(II, *this);
         break;
       case TargetOpcode::KILL:
-        printKill(II);
+        if (isVerbose()) EmitKill(II, *this);
         break;
       default:
         EmitInstruction(II);
         break;
       }
       
-      // FIXME: Clean up processDebugLoc.
-      processDebugLoc(II, false);
+      if (ShouldPrintDebugScopes) {
+	if (TimePassesIsEnabled) {
+	  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+	  DD->endScope(II);
+	} else {
+	  DD->endScope(II);
+	}
+      }
     }
   }
   
   // If the function is empty and the object file uses .subsections_via_symbols,
   // then we need to emit *something* to the function body to prevent the
-  // labels from collapsing together.  Just emit a 0 byte.
-  if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)
-    OutStreamer.EmitIntValue(0, 1, 0/*addrspace*/);
+  // labels from collapsing together.  Just emit a noop.
+  if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) {
+    MCInst Noop;
+    TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+    if (Noop.getOpcode()) {
+      OutStreamer.AddComment("avoids zero-length function");
+      OutStreamer.EmitInstruction(Noop);
+    } else  // Target not mc-ized yet.
+      OutStreamer.EmitRawText(StringRef("\tnop\n"));
+  }
   
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
   
-  if (MAI->hasDotTypeDotSizeDirective())
-    O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n';
+  // If the target wants a .size directive for the size of the function, emit
+  // it.
+  if (MAI->hasDotTypeDotSizeDirective()) {
+    // Create a symbol for the end of function, so we can get the size as
+    // difference between the function label and the temp label.
+    MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
+    OutStreamer.EmitLabel(FnEndLabel);
+    
+    const MCExpr *SizeExp =
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+                              MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+                              OutContext);
+    OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+  }
   
   // Emit post-function debug information.
-  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
-    DW->EndFunction(MF);
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->endFunction(MF);
+    } else {
+      DD->endFunction(MF);
+    }
+  }
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->EndFunction();
+    } else {
+      DE->EndFunction();
+    }
+  }
+  MMI->EndFunction();
   
   // Print out jump tables referenced by the function.
   EmitJumpTableInfo();
@@ -463,9 +647,25 @@
        I != E; ++I)
     EmitGlobalVariable(I);
   
-  // Emit final debug information.
-  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
-    DW->EndModule();
+  // Finalize debug and EH information.
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->EndModule();
+    } else {
+      DE->EndModule();
+    }
+    delete DE; DE = 0;
+  }
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->endModule();
+    } else {
+      DD->endModule();
+    }
+    delete DD; DD = 0;
+  }
   
   // If the target wants to know about weak references, print them all.
   if (MAI->getWeakRefDirective()) {
@@ -478,14 +678,12 @@
     for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I) {
       if (!I->hasExternalWeakLinkage()) continue;
-      OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(I),
-                                      MCSA_WeakReference);
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
     }
     
     for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
       if (!I->hasExternalWeakLinkage()) continue;
-      OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(I),
-                                      MCSA_WeakReference);
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
     }
   }
 
@@ -493,10 +691,10 @@
     OutStreamer.AddBlankLine();
     for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
          I != E; ++I) {
-      MCSymbol *Name = GetGlobalValueSymbol(I);
+      MCSymbol *Name = Mang->getSymbol(I);
 
       const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
-      MCSymbol *Target = GetGlobalValueSymbol(GV);
+      MCSymbol *Target = Mang->getSymbol(GV);
 
       if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
         OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
@@ -517,13 +715,13 @@
   assert(MI && "AsmPrinter didn't require GCModuleInfo?");
   for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
     if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
-      MP->finishAssembly(O, *this, *MAI);
+      MP->finishAssembly(*this);
 
   // If we don't have any trampolines, then we don't require stack memory
   // to be executable. Some targets have a directive to declare this.
   Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
-    if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+    if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
       OutStreamer.SwitchSection(S);
   
   // Allow the target to emit any magic that it wants at the end of the file,
@@ -531,7 +729,7 @@
   EmitEndOfAsmFile(M);
   
   delete Mang; Mang = 0;
-  DW = 0; MMI = 0;
+  MMI = 0;
   
   OutStreamer.Finish();
   return false;
@@ -540,9 +738,9 @@
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
   // Get the function symbol.
-  CurrentFnSym = GetGlobalValueSymbol(MF.getFunction());
+  CurrentFnSym = Mang->getSymbol(MF.getFunction());
 
-  if (VerboseAsm)
+  if (isVerbose())
     LI = &getAnalysis<MachineLoopInfo>();
 }
 
@@ -630,7 +828,7 @@
       Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
 
       // Emit the label with a comment on it.
-      if (VerboseAsm) {
+      if (isVerbose()) {
         OutStreamer.GetCommentOS() << "constant pool ";
         WriteTypeSymbolic(OutStreamer.GetCommentOS(), CPE.getType(),
                           MF->getFunction()->getParent());
@@ -652,6 +850,7 @@
 void AsmPrinter::EmitJumpTableInfo() {
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (MJTI == 0) return;
+  if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
 
@@ -699,7 +898,7 @@
         
         // .set LJTSet, LBB32-base
         const MCExpr *LHS =
-          MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+          MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
         OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
                                 MCBinaryExpr::CreateSub(LHS, Base, OutContext));
       }
@@ -728,6 +927,8 @@
                                     unsigned UID) const {
   const MCExpr *Value = 0;
   switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_Inline:
+    llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
   case MachineJumpTableInfo::EK_Custom32:
     Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
                                                               OutContext);
@@ -735,13 +936,13 @@
   case MachineJumpTableInfo::EK_BlockAddress:
     // EK_BlockAddress - Each entry is a plain address of block, e.g.:
     //     .word LBB123
-    Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
     break;
   case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
     // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
     // with a relocation as gp-relative, e.g.:
     //     .gprel32 LBB123
-    MCSymbol *MBBSym = MBB->getSymbol(OutContext);
+    MCSymbol *MBBSym = MBB->getSymbol();
     OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
     return;
   }
@@ -765,7 +966,7 @@
       break;
     }
     // Otherwise, use the difference as the jump table entry.
-    Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
     const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
     Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
     break;
@@ -843,8 +1044,7 @@
     const GlobalValue *GV =
       dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
     if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
-      OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(GV),
-                                      MCSA_NoDeadStrip);
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
   }
 }
 
@@ -888,12 +1088,59 @@
   OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/);
 }
 
-/// EmitInt64 - Emit a long long directive and value.
-///
-void AsmPrinter::EmitInt64(uint64_t Value) const {
-  OutStreamer.EmitIntValue(Value, 8, 0/*addrspace*/);
+/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
+/// in bytes of the directive is specified by Size and Hi/Lo specify the
+/// labels.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+                                     unsigned Size) const {
+  // Get the Hi-Lo expression.
+  const MCExpr *Diff = 
+    MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
+                            MCSymbolRefExpr::Create(Lo, OutContext),
+                            OutContext);
+  
+  if (!MAI->hasSetDirective()) {
+    OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/);
+    return;
+  }
+
+  // Otherwise, emit with .set (aka assignment).
+  MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+  OutStreamer.EmitAssignment(SetLabel, Diff);
+  OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
 }
 
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" 
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+                                           const MCSymbol *Lo, unsigned Size) 
+  const {
+  
+  // Emit Hi+Offset - Lo
+  // Get the Hi+Offset expression.
+  const MCExpr *Plus =
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), 
+                            MCConstantExpr::Create(Offset, OutContext),
+                            OutContext);
+  
+  // Get the Hi+Offset-Lo expression.
+  const MCExpr *Diff = 
+    MCBinaryExpr::CreateSub(Plus,
+                            MCSymbolRefExpr::Create(Lo, OutContext),
+                            OutContext);
+  
+  if (!MAI->hasSetDirective()) 
+    OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+  else {
+    // Otherwise, emit with .set (aka assignment).
+    MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+    OutStreamer.EmitAssignment(SetLabel, Diff);
+    OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+  }
+}
+    
+
 //===----------------------------------------------------------------------===//
 
 // EmitAlignment - Emit an alignment directive to the specified power of
@@ -924,6 +1171,10 @@
     OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
 }
 
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
 ///
 static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
@@ -936,7 +1187,7 @@
     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
   
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
-    return MCSymbolRefExpr::Create(AP.GetGlobalValueSymbol(GV), Ctx);
+    return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
   
@@ -1051,12 +1302,15 @@
   }
 }
 
+static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+                                   AsmPrinter &AP);
+
 static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
                                     AsmPrinter &AP) {
   if (AddrSpace != 0 || !CA->isString()) {
     // Not a string.  Print the values in successive locations
     for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-      AP.EmitGlobalConstant(CA->getOperand(i), AddrSpace);
+      EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
     return;
   }
   
@@ -1072,7 +1326,7 @@
 static void EmitGlobalConstantVector(const ConstantVector *CV,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-    AP.EmitGlobalConstant(CV->getOperand(i), AddrSpace);
+    EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
 }
 
 static void EmitGlobalConstantStruct(const ConstantStruct *CS,
@@ -1092,7 +1346,7 @@
     SizeSoFar += FieldSize + PadSize;
 
     // Now print the actual field value.
-    AP.EmitGlobalConstant(Field, AddrSpace);
+    EmitGlobalConstantImpl(Field, AddrSpace, AP);
 
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
@@ -1103,12 +1357,27 @@
          "Layout of constant struct may be incorrect!");
 }
 
+static void EmitGlobalConstantUnion(const ConstantUnion *CU, 
+                                    unsigned AddrSpace, AsmPrinter &AP) {
+  const TargetData *TD = AP.TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CU->getType());
+
+  const Constant *Contents = CU->getOperand(0);
+  unsigned FilledSize = TD->getTypeAllocSize(Contents->getType());
+    
+  // Print the actually filled part
+  EmitGlobalConstantImpl(Contents, AddrSpace, AP);
+
+  // And pad with enough zeroes
+  AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace);
+}
+
 static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
                                  AsmPrinter &AP) {
   // FP Constants are printed as integer constants to avoid losing
   // precision.
   if (CFP->getType()->isDoubleTy()) {
-    if (AP.VerboseAsm) {
+    if (AP.isVerbose()) {
       double Val = CFP->getValueAPF().convertToDouble();
       AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
     }
@@ -1119,7 +1388,7 @@
   }
   
   if (CFP->getType()->isFloatTy()) {
-    if (AP.VerboseAsm) {
+    if (AP.isVerbose()) {
       float Val = CFP->getValueAPF().convertToFloat();
       AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
     }
@@ -1130,10 +1399,10 @@
   
   if (CFP->getType()->isX86_FP80Ty()) {
     // all long double variants are printed as hex
-    // api needed to prevent premature destruction
+    // API needed to prevent premature destruction
     APInt API = CFP->getValueAPF().bitcastToAPInt();
     const uint64_t *p = API.getRawData();
-    if (AP.VerboseAsm) {
+    if (AP.isVerbose()) {
       // Convert to double so we can print the approximate val as a comment.
       APFloat DoubleVal = CFP->getValueAPF();
       bool ignored;
@@ -1160,8 +1429,8 @@
   
   assert(CFP->getType()->isPPC_FP128Ty() &&
          "Floating point constant type not handled");
-  // All long double variants are printed as hex api needed to prevent
-  // premature destruction.
+  // All long double variants are printed as hex
+  // API needed to prevent premature destruction.
   APInt API = CFP->getValueAPF().bitcastToAPInt();
   const uint64_t *p = API.getRawData();
   if (AP.TM.getTargetData()->isBigEndian()) {
@@ -1189,54 +1458,68 @@
   }
 }
 
-/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+                                   AsmPrinter &AP) {
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
-    uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
-    if (Size == 0) Size = 1; // An empty "_foo:" followed by a section is undef.
-    return OutStreamer.EmitZeros(Size, AddrSpace);
+    uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    return AP.OutStreamer.EmitZeros(Size, AddrSpace);
   }
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     switch (Size) {
     case 1:
     case 2:
     case 4:
     case 8:
-      if (VerboseAsm)
-        OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
-      OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+        AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
       return;
     default:
-      EmitGlobalConstantLargeInt(CI, AddrSpace, *this);
+      EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
       return;
     }
   }
   
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return EmitGlobalConstantArray(CVA, AddrSpace, *this);
+    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
   
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return EmitGlobalConstantStruct(CVS, AddrSpace, *this);
+    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
-    return EmitGlobalConstantFP(CFP, AddrSpace, *this);
-  
-  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return EmitGlobalConstantVector(V, AddrSpace, *this);
+    return EmitGlobalConstantFP(CFP, AddrSpace, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
-    OutStreamer.EmitIntValue(0, Size, AddrSpace);
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
     return;
   }
   
+  if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV))
+    return EmitGlobalConstantUnion(CVU, AddrSpace, AP);
+  
+  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+    return EmitGlobalConstantVector(V, AddrSpace, AP);
+  
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  OutStreamer.EmitValue(LowerConstant(CV, *this),
-                        TM.getTargetData()->getTypeAllocSize(CV->getType()),
-                        AddrSpace);
+  AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
+                         AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
+                           AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+  uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+  if (Size)
+    EmitGlobalConstantImpl(CV, AddrSpace, *this);
+  else if (MAI->hasSubsectionsViaSymbols()) {
+    // If the global has zero size, emit a single byte so that two labels don't
+    // look like they are at the same location.
+    OutStreamer.EmitIntValue(0, 1, AddrSpace);
+  }
 }
 
 void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
@@ -1244,347 +1527,45 @@
   llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
 }
 
-/// PrintSpecial - Print information related to the specified machine instr
-/// that is independent of the operand, and may be independent of the instr
-/// itself.  This can be useful for portably encoding the comment character
-/// or other bits of target-specific knowledge into the asmstrings.  The
-/// syntax used is ${:comment}.  Targets can override this to add support
-/// for their own strange codes.
-void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
-  if (!strcmp(Code, "private")) {
-    O << MAI->getPrivateGlobalPrefix();
-  } else if (!strcmp(Code, "comment")) {
-    if (VerboseAsm)
-      O << MAI->getCommentString();
-  } else if (!strcmp(Code, "uid")) {
-    // Comparing the address of MI isn't sufficient, because machineinstrs may
-    // be allocated to the same address across functions.
-    const Function *ThisF = MI->getParent()->getParent()->getFunction();
-    
-    // If this is a new LastFn instruction, bump the counter.
-    if (LastMI != MI || LastFn != ThisF) {
-      ++Counter;
-      LastMI = MI;
-      LastFn = ThisF;
-    }
-    O << Counter;
-  } else {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "Unknown special formatter '" << Code
-         << "' for machine instr: " << *MI;
-    llvm_report_error(Msg.str());
-  }    
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+  if (Offset > 0)
+    OS << '+' << Offset;
+  else if (Offset < 0)
+    OS << Offset;
 }
 
-/// processDebugLoc - Processes the debug information of each machine
-/// instruction's DebugLoc.
-void AsmPrinter::processDebugLoc(const MachineInstr *MI, 
-                                 bool BeforePrintingInsn) {
-  if (!MAI || !DW || !MAI->doesSupportDebugInformation()
-      || !DW->ShouldEmitDwarfDebug())
-    return;
-  DebugLoc DL = MI->getDebugLoc();
-  if (DL.isUnknown())
-    return;
-  DILocation CurDLT = MF->getDILocation(DL);
-  if (CurDLT.getScope().isNull())
-    return;
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
 
-  if (!BeforePrintingInsn) {
-    // After printing instruction
-    DW->EndScope(MI);
-  } else if (CurDLT.getNode() != PrevDLT) {
-    unsigned L = DW->RecordSourceLine(CurDLT.getLineNumber(), 
-                                      CurDLT.getColumnNumber(),
-                                      CurDLT.getScope().getNode());
-    printLabel(L);
-    O << '\n';
-    DW->BeginScope(MI, L);
-    PrevDLT = CurDLT.getNode();
-  }
+/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
+/// temporary label with the specified stem and unique ID.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
+  return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+                                      Name + Twine(ID));
 }
 
-
-/// printInlineAsm - This method formats and prints the specified machine
-/// instruction that is an inline asm.
-void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
-  unsigned NumOperands = MI->getNumOperands();
-  
-  // Count the number of register definitions.
-  unsigned NumDefs = 0;
-  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
-       ++NumDefs)
-    assert(NumDefs != NumOperands-1 && "No asm string?");
-  
-  assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
-
-  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
-  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
-
-  O << '\t';
-
-  // If this asmstr is empty, just print the #APP/#NOAPP markers.
-  // These are useful to see where empty asm's wound up.
-  if (AsmStr[0] == 0) {
-    O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t";
-    O << MAI->getCommentString() << MAI->getInlineAsmEnd() << '\n';
-    return;
-  }
-  
-  O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t";
-
-  // The variant of the current asmprinter.
-  int AsmPrinterVariant = MAI->getAssemblerDialect();
-
-  int CurVariant = -1;            // The number of the {.|.|.} region we are in.
-  const char *LastEmitted = AsmStr; // One past the last character emitted.
-  
-  while (*LastEmitted) {
-    switch (*LastEmitted) {
-    default: {
-      // Not a special case, emit the string section literally.
-      const char *LiteralEnd = LastEmitted+1;
-      while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
-             *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
-        ++LiteralEnd;
-      if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
-        O.write(LastEmitted, LiteralEnd-LastEmitted);
-      LastEmitted = LiteralEnd;
-      break;
-    }
-    case '\n':
-      ++LastEmitted;   // Consume newline character.
-      O << '\n';       // Indent code with newline.
-      break;
-    case '$': {
-      ++LastEmitted;   // Consume '$' character.
-      bool Done = true;
-
-      // Handle escapes.
-      switch (*LastEmitted) {
-      default: Done = false; break;
-      case '$':     // $$ -> $
-        if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
-          O << '$';
-        ++LastEmitted;  // Consume second '$' character.
-        break;
-      case '(':             // $( -> same as GCC's { character.
-        ++LastEmitted;      // Consume '(' character.
-        if (CurVariant != -1) {
-          llvm_report_error("Nested variants found in inline asm string: '"
-                            + std::string(AsmStr) + "'");
-        }
-        CurVariant = 0;     // We're in the first variant now.
-        break;
-      case '|':
-        ++LastEmitted;  // consume '|' character.
-        if (CurVariant == -1)
-          O << '|';       // this is gcc's behavior for | outside a variant
-        else
-          ++CurVariant;   // We're in the next variant.
-        break;
-      case ')':         // $) -> same as GCC's } char.
-        ++LastEmitted;  // consume ')' character.
-        if (CurVariant == -1)
-          O << '}';     // this is gcc's behavior for } outside a variant
-        else 
-          CurVariant = -1;
-        break;
-      }
-      if (Done) break;
-      
-      bool HasCurlyBraces = false;
-      if (*LastEmitted == '{') {     // ${variable}
-        ++LastEmitted;               // Consume '{' character.
-        HasCurlyBraces = true;
-      }
-      
-      // If we have ${:foo}, then this is not a real operand reference, it is a
-      // "magic" string reference, just like in .td files.  Arrange to call
-      // PrintSpecial.
-      if (HasCurlyBraces && *LastEmitted == ':') {
-        ++LastEmitted;
-        const char *StrStart = LastEmitted;
-        const char *StrEnd = strchr(StrStart, '}');
-        if (StrEnd == 0) {
-          llvm_report_error("Unterminated ${:foo} operand in inline asm string: '" 
-                            + std::string(AsmStr) + "'");
-        }
-        
-        std::string Val(StrStart, StrEnd);
-        PrintSpecial(MI, Val.c_str());
-        LastEmitted = StrEnd+1;
-        break;
-      }
-            
-      const char *IDStart = LastEmitted;
-      char *IDEnd;
-      errno = 0;
-      long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs.
-      if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) {
-        llvm_report_error("Bad $ operand number in inline asm string: '" 
-                          + std::string(AsmStr) + "'");
-      }
-      LastEmitted = IDEnd;
-      
-      char Modifier[2] = { 0, 0 };
-      
-      if (HasCurlyBraces) {
-        // If we have curly braces, check for a modifier character.  This
-        // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
-        if (*LastEmitted == ':') {
-          ++LastEmitted;    // Consume ':' character.
-          if (*LastEmitted == 0) {
-            llvm_report_error("Bad ${:} expression in inline asm string: '" 
-                              + std::string(AsmStr) + "'");
-          }
-          
-          Modifier[0] = *LastEmitted;
-          ++LastEmitted;    // Consume modifier character.
-        }
-        
-        if (*LastEmitted != '}') {
-          llvm_report_error("Bad ${} expression in inline asm string: '" 
-                            + std::string(AsmStr) + "'");
-        }
-        ++LastEmitted;    // Consume '}' character.
-      }
-      
-      if ((unsigned)Val >= NumOperands-1) {
-        llvm_report_error("Invalid $ operand number in inline asm string: '" 
-                          + std::string(AsmStr) + "'");
-      }
-      
-      // Okay, we finally have a value number.  Ask the target to print this
-      // operand!
-      if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
-        unsigned OpNo = 1;
-
-        bool Error = false;
-
-        // Scan to find the machine operand number for the operand.
-        for (; Val; --Val) {
-          if (OpNo >= MI->getNumOperands()) break;
-          unsigned OpFlags = MI->getOperand(OpNo).getImm();
-          OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
-        }
-
-        if (OpNo >= MI->getNumOperands()) {
-          Error = true;
-        } else {
-          unsigned OpFlags = MI->getOperand(OpNo).getImm();
-          ++OpNo;  // Skip over the ID number.
-
-          if (Modifier[0] == 'l')  // labels are target independent
-            O << *MI->getOperand(OpNo).getMBB()->getSymbol(OutContext);
-          else {
-            AsmPrinter *AP = const_cast<AsmPrinter*>(this);
-            if ((OpFlags & 7) == 4) {
-              Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
-                                                Modifier[0] ? Modifier : 0);
-            } else {
-              Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
-                                          Modifier[0] ? Modifier : 0);
-            }
-          }
-        }
-        if (Error) {
-          std::string msg;
-          raw_string_ostream Msg(msg);
-          Msg << "Invalid operand found in inline asm: '" << AsmStr << "'\n";
-          MI->print(Msg);
-          llvm_report_error(Msg.str());
-        }
-      }
-      break;
-    }
-    }
-  }
-  O << "\n\t" << MAI->getCommentString() << MAI->getInlineAsmEnd();
-  OutStreamer.AddBlankLine();
+/// GetTempSymbol - Return an assembler temporary label with the specified
+/// stem.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
+  return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
+                                      Name);
 }
 
-/// printImplicitDef - This method prints the specified machine instruction
-/// that is an implicit def.
-void AsmPrinter::printImplicitDef(const MachineInstr *MI) const {
-  if (!VerboseAsm) return;
-  O.PadToColumn(MAI->getCommentColumn());
-  O << MAI->getCommentString() << " implicit-def: "
-    << TRI->getName(MI->getOperand(0).getReg());
-  OutStreamer.AddBlankLine();
-}
-
-void AsmPrinter::printKill(const MachineInstr *MI) const {
-  if (!VerboseAsm) return;
-  O.PadToColumn(MAI->getCommentColumn());
-  O << MAI->getCommentString() << " kill:";
-  for (unsigned n = 0, e = MI->getNumOperands(); n != e; ++n) {
-    const MachineOperand &op = MI->getOperand(n);
-    assert(op.isReg() && "KILL instruction must have only register operands");
-    O << ' ' << TRI->getName(op.getReg()) << (op.isDef() ? "<def>" : "<kill>");
-  }
-  OutStreamer.AddBlankLine();
-}
-
-/// printLabel - This method prints a local label used by debug and
-/// exception handling tables.
-void AsmPrinter::printLabelInst(const MachineInstr *MI) const {
-  printLabel(MI->getOperand(0).getImm());
-  OutStreamer.AddBlankLine();
-}
-
-void AsmPrinter::printLabel(unsigned Id) const {
-  O << MAI->getPrivateGlobalPrefix() << "label" << Id << ':';
-}
-
-/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
-/// instruction, using the specified assembler variant.  Targets should
-/// override this to format as appropriate.
-bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                 unsigned AsmVariant, const char *ExtraCode) {
-  // Target doesn't support this yet!
-  return true;
-}
-
-bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                                       unsigned AsmVariant,
-                                       const char *ExtraCode) {
-  // Target doesn't support this yet!
-  return true;
-}
 
 MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
-  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock());
+  return MMI->getAddrLabelSymbol(BA->getBasicBlock());
 }
 
-MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
-                                            const BasicBlock *BB) const {
-  assert(BB->hasName() &&
-         "Address of anonymous basic block not supported yet!");
-
-  // This code must use the function name itself, and not the function number,
-  // since it must be possible to generate the label name from within other
-  // functions.
-  SmallString<60> FnName;
-  Mang->getNameWithPrefix(FnName, F, false);
-
-  // FIXME: THIS IS BROKEN IF THE LLVM BASIC BLOCK DOESN'T HAVE A NAME!
-  SmallString<60> NameResult;
-  Mang->getNameWithPrefix(NameResult,
-                          StringRef("BA") + Twine((unsigned)FnName.size()) + 
-                          "_" + FnName.str() + "_" + BB->getName(), 
-                          Mangler::Private);
-
-  return OutContext.GetOrCreateSymbol(NameResult.str());
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+  return MMI->getAddrLabelSymbol(BB);
 }
 
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
 MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
-  SmallString<60> Name;
-  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "CPI"
-    << getFunctionNumber() << '_' << CPID;
-  return OutContext.GetOrCreateSymbol(Name.str());
+  return OutContext.GetOrCreateSymbol
+    (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+     + "_" + Twine(CPID));
 }
 
 /// GetJTISymbol - Return the symbol for the specified jump table entry.
@@ -1595,18 +1576,9 @@
 /// GetJTSetSymbol - Return the symbol for the specified jump table .set
 /// FIXME: privatize to AsmPrinter.
 MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
-  SmallString<60> Name;
-  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
-    << getFunctionNumber() << '_' << UID << "_set_" << MBBID;
-  return OutContext.GetOrCreateSymbol(Name.str());
-}
-
-/// GetGlobalValueSymbol - Return the MCSymbol for the specified global
-/// value.
-MCSymbol *AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const {
-  SmallString<60> NameStr;
-  Mang->getNameWithPrefix(NameStr, GV, false);
-  return OutContext.GetOrCreateSymbol(NameStr.str());
+  return OutContext.GetOrCreateSymbol
+  (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
+   Twine(UID) + "_set_" + Twine(MBBID));
 }
 
 /// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
@@ -1657,10 +1629,10 @@
   }
 }
 
-/// PrintBasicBlockLoopComments - Pretty-print comments for basic blocks.
-static void PrintBasicBlockLoopComments(const MachineBasicBlock &MBB,
-                                        const MachineLoopInfo *LI,
-                                        const AsmPrinter &AP) {
+/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+                                       const MachineLoopInfo *LI,
+                                       const AsmPrinter &AP) {
   // Add loop depth information
   const MachineLoop *Loop = LI->getLoopFor(&MBB);
   if (Loop == 0) return;
@@ -1704,39 +1676,43 @@
   if (unsigned Align = MBB->getAlignment())
     EmitAlignment(Log2_32(Align));
 
-  // If the block has its address taken, emit a special label to satisfy
-  // references to the block. This is done so that we don't need to
-  // remember the number of this label, and so that we can make
-  // forward references to labels without knowing what their numbers
-  // will be.
+  // If the block has its address taken, emit any labels that were used to
+  // reference the block.  It is possible that there is more than one label
+  // here, because multiple LLVM BB's may have been RAUW'd to this block after
+  // the references were generated.
   if (MBB->hasAddressTaken()) {
     const BasicBlock *BB = MBB->getBasicBlock();
-    if (VerboseAsm)
-      OutStreamer.AddComment("Address Taken");
-    OutStreamer.EmitLabel(GetBlockAddressSymbol(BB->getParent(), BB));
+    if (isVerbose())
+      OutStreamer.AddComment("Block address taken");
+    
+    std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
+
+    for (unsigned i = 0, e = Syms.size(); i != e; ++i)
+      OutStreamer.EmitLabel(Syms[i]);
   }
 
   // Print the main label for the block.
   if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
-    if (VerboseAsm) {
-      // NOTE: Want this comment at start of line.
-      O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':';
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
       if (const BasicBlock *BB = MBB->getBasicBlock())
         if (BB->hasName())
           OutStreamer.AddComment("%" + BB->getName());
       
-      PrintBasicBlockLoopComments(*MBB, LI, *this);
-      OutStreamer.AddBlankLine();
+      EmitBasicBlockLoopComments(*MBB, LI, *this);
+      
+      // NOTE: Want this comment at start of line, don't emit with AddComment.
+      OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
+                              Twine(MBB->getNumber()) + ":");
     }
   } else {
-    if (VerboseAsm) {
+    if (isVerbose()) {
       if (const BasicBlock *BB = MBB->getBasicBlock())
         if (BB->hasName())
           OutStreamer.AddComment("%" + BB->getName());
-      PrintBasicBlockLoopComments(*MBB, LI, *this);
+      EmitBasicBlockLoopComments(*MBB, LI, *this);
     }
 
-    OutStreamer.EmitLabel(MBB->getSymbol(OutContext));
+    OutStreamer.EmitLabel(MBB->getSymbol());
   }
 }
 
@@ -1757,18 +1733,11 @@
     OutStreamer.EmitSymbolAttribute(Sym, Attr);
 }
 
-void AsmPrinter::printOffset(int64_t Offset) const {
-  if (Offset > 0)
-    O << '+' << Offset;
-  else if (Offset < 0)
-    O << Offset;
-}
-
 /// isBlockOnlyReachableByFallthough - Return true if the basic block has
 /// exactly one predecessor and the control transfer mechanism between
 /// the predecessor and this block is a fall-through.
-bool AsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) 
-    const {
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   // If this is a landing pad, it isn't a fall through.  If it has no preds,
   // then nothing falls through to it.
   if (MBB->isLandingPad() || MBB->pred_empty())
@@ -1800,9 +1769,10 @@
 GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
   if (!S->usesMetadata())
     return 0;
-  
-  gcp_iterator GCPI = GCMetadataPrinters.find(S);
-  if (GCPI != GCMetadataPrinters.end())
+
+  gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+  gcp_map_type::iterator GCPI = GCMap.find(S);
+  if (GCPI != GCMap.end())
     return GCPI->second;
   
   const char *Name = S->getName().c_str();
@@ -1813,11 +1783,11 @@
     if (strcmp(Name, I->getName()) == 0) {
       GCMetadataPrinter *GMP = I->instantiate();
       GMP->S = S;
-      GCMetadataPrinters.insert(std::make_pair(S, GMP));
+      GCMap.insert(std::make_pair(S, GMP));
       return GMP;
     }
   
-  llvm_report_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+  report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
   return 0;
 }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 0000000..b310578
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,279 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
+  if (isVerbose() && Desc)
+    OutStreamer.AddComment(Desc);
+    
+  if (MAI->hasLEB128()) {
+    // FIXME: MCize.
+    OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value));
+    return;
+  }
+
+  // If we don't have .sleb128, emit as .bytes.
+  int Sign = Value >> (8 * sizeof(Value) - 1);
+  bool IsMore;
+  
+  do {
+    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+    Value >>= 7;
+    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+    if (IsMore) Byte |= 0x80;
+    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+  } while (IsMore);
+}
+
+/// EmitULEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
+                             unsigned PadTo) const {
+  if (isVerbose() && Desc)
+    OutStreamer.AddComment(Desc);
+ 
+  if (MAI->hasLEB128() && PadTo == 0) {
+    // FIXME: MCize.
+    OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value));
+    return;
+  }
+  
+  // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
+  do {
+    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+    Value >>= 7;
+    if (Value || PadTo != 0) Byte |= 0x80;
+    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+  } while (Value);
+
+  if (PadTo) {
+    if (PadTo > 1)
+      OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
+    OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
+  }
+}
+
+/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+void AsmPrinter::EmitCFAByte(unsigned Val) const {
+  if (isVerbose()) {
+    if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+      OutStreamer.AddComment("DW_CFA_offset + Reg (" + 
+                             Twine(Val-dwarf::DW_CFA_offset) + ")");
+    else
+      OutStreamer.AddComment(dwarf::CallFrameString(Val));
+  }
+  OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+  switch (Encoding) {
+  case dwarf::DW_EH_PE_absptr: return "absptr";
+  case dwarf::DW_EH_PE_omit:   return "omit";
+  case dwarf::DW_EH_PE_pcrel:  return "pcrel";
+  case dwarf::DW_EH_PE_udata4: return "udata4";
+  case dwarf::DW_EH_PE_udata8: return "udata8";
+  case dwarf::DW_EH_PE_sdata4: return "sdata4";
+  case dwarf::DW_EH_PE_sdata8: return "sdata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+    return "indirect pcrel udata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+    return "indirect pcrel sdata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+    return "indirect pcrel udata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+    return "indirect pcrel sdata8";
+  }
+  
+  return "<unknown encoding>";
+}
+
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding.  If verbose assembly output is enabled, we output comments
+/// describing the encoding.  Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+  if (isVerbose()) {
+    if (Desc != 0)
+      OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+                             Twine(DecodeDWARFEncoding(Val)));
+    else
+      OutStreamer.AddComment(Twine("Encoding = ") +
+                             DecodeDWARFEncoding(Val));
+  }
+  
+  OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return 0;
+  
+  switch (Encoding & 0x07) {
+  default: assert(0 && "Invalid encoded value.");
+  case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
+  case dwarf::DW_EH_PE_udata2: return 2;
+  case dwarf::DW_EH_PE_udata4: return 4;
+  case dwarf::DW_EH_PE_udata8: return 8;
+  }
+}
+
+void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  
+  const MCExpr *Exp =
+    TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer);
+  OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+}
+
+void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  
+  const MCExpr *Exp =
+    TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+  OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+}
+
+/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
+/// section.  This can be done with a special directive if the target supports
+/// it (e.g. cygwin) or by emitting it as an offset from a label at the start
+/// of the section.
+///
+/// SectionLabel is a temporary label emitted at the start of the section that
+/// Label lives in.
+void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
+                                   const MCSymbol *SectionLabel) const {
+  // On COFF targets, we have to emit the special .secrel32 directive.
+  if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
+    // FIXME: MCize.
+    OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+    return;
+  }
+  
+  // Get the section that we're referring to, based on SectionLabel.
+  const MCSection &Section = SectionLabel->getSection();
+  
+  // If Label has already been emitted, verify that it is in the same section as
+  // section label for sanity.
+  assert((!Label->isInSection() || &Label->getSection() == &Section) &&
+         "Section offset using wrong section base for label");
+  
+  // If the section in question will end up with an address of 0 anyway, we can
+  // just emit an absolute reference to save a relocation.
+  if (Section.isBaseAddressKnownZero()) {
+    OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
+    return;
+  }
+  
+  // Otherwise, emit it as a label difference from the start of the section.
+  EmitLabelDifference(Label, SectionLabel, 4);
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
+                                MCSymbol *BaseLabel, bool isEH) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  
+  int stackGrowth = TM.getTargetData()->getPointerSize();
+  if (TM.getFrameInfo()->getStackGrowthDirection() !=
+      TargetFrameInfo::StackGrowsUp)
+    stackGrowth *= -1;
+  
+  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+    const MachineMove &Move = Moves[i];
+    MCSymbol *Label = Move.getLabel();
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+    
+    const MachineLocation &Dst = Move.getDestination();
+    const MachineLocation &Src = Move.getSource();
+    
+    // Advance row if new location.
+    if (BaseLabel && Label) {
+      MCSymbol *ThisSym = Label;
+      if (ThisSym != BaseLabel) {
+        EmitCFAByte(dwarf::DW_CFA_advance_loc4);
+        EmitLabelDifference(ThisSym, BaseLabel, 4);
+        BaseLabel = ThisSym;
+      }
+    }
+    
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      assert(!Src.isReg() && "Machine move not supported yet.");
+      
+      if (Src.getReg() == MachineLocation::VirtualFP) {
+        EmitCFAByte(dwarf::DW_CFA_def_cfa_offset);
+      } else {
+        EmitCFAByte(dwarf::DW_CFA_def_cfa);
+        EmitULEB128(RI->getDwarfRegNum(Src.getReg(), isEH), "Register");
+      }
+      
+      EmitULEB128(-Src.getOffset(), "Offset");
+      continue;
+    }
+    
+    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      assert(Dst.isReg() && "Machine move not supported yet.");
+      EmitCFAByte(dwarf::DW_CFA_def_cfa_register);
+      EmitULEB128(RI->getDwarfRegNum(Dst.getReg(), isEH), "Register");
+      continue;
+    }
+    
+    unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
+    int Offset = Dst.getOffset() / stackGrowth;
+    
+    if (Offset < 0) {
+      EmitCFAByte(dwarf::DW_CFA_offset_extended_sf);
+      EmitULEB128(Reg, "Reg");
+      EmitSLEB128(Offset, "Offset");
+    } else if (Reg < 64) {
+      EmitCFAByte(dwarf::DW_CFA_offset + Reg);
+      EmitULEB128(Offset, "Offset");
+    } else {
+      EmitCFAByte(dwarf::DW_CFA_offset_extended);
+      EmitULEB128(Reg, "Reg");
+      EmitULEB128(Offset, "Offset");
+    }
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 0000000..37d10e5
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,370 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCParser/AsmParser.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
+  assert(!Str.empty() && "Can't emit empty inline asm block");
+  
+  // Remember if the buffer is nul terminated or not so we can avoid a copy.
+  bool isNullTerminated = Str.back() == 0;
+  if (isNullTerminated)
+    Str = Str.substr(0, Str.size()-1);
+  
+  // If the output streamer is actually a .s file, just emit the blob textually.
+  // This is useful in case the asm parser doesn't handle something but the
+  // system assembler does.
+  if (OutStreamer.hasRawTextSupport()) {
+    OutStreamer.EmitRawText(Str);
+    return;
+  }
+  
+  SourceMgr SrcMgr;
+  
+  // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+  LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+  bool HasDiagHandler = false;
+  if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) {
+    SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler,
+                          LLVMCtx.getInlineAsmDiagnosticContext(), LocCookie);
+    HasDiagHandler = true;
+  }
+  
+  MemoryBuffer *Buffer;
+  if (isNullTerminated)
+    Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
+  else
+    Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+
+  // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+  
+  AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI);
+  OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser));
+  if (!TAP)
+    report_fatal_error("Inline asm not supported by this streamer because"
+                       " we don't have an asm parser for this target\n");
+  Parser.setTargetParser(*TAP.get());
+
+  // Don't implicitly switch to the text section before the asm.
+  int Res = Parser.Run(/*NoInitialTextSection*/ true,
+                       /*NoFinalize*/ true);
+  if (Res && !HasDiagHandler)
+    report_fatal_error("Error parsing inline asm\n");
+}
+
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+  assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+  
+  unsigned NumOperands = MI->getNumOperands();
+  
+  // Count the number of register definitions to find the asm string.
+  unsigned NumDefs = 0;
+  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+       ++NumDefs)
+    assert(NumDefs != NumOperands-2 && "No asm string?");
+  
+  assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+  // If this asmstr is empty, just print the #APP/#NOAPP markers.
+  // These are useful to see where empty asm's wound up.
+  if (AsmStr[0] == 0) {
+    // Don't emit the comments if writing to a .o file.
+    if (!OutStreamer.hasRawTextSupport()) return;
+
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmEnd());
+    return;
+  }
+
+  // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
+  // enabled, so we use EmitRawText.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+
+  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+  // it.
+  unsigned LocCookie = 0;
+  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+    if (MI->getOperand(i-1).isMetadata())
+      if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata())
+        if (SrcLoc->getNumOperands() != 0)
+          if (const ConstantInt *CI =
+              dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) {
+            LocCookie = CI->getZExtValue();
+            break;
+          }
+  }
+  
+  // Emit the inline asm to a temporary string so we can emit it through
+  // EmitInlineAsm.
+  SmallString<256> StringData;
+  raw_svector_ostream OS(StringData);
+  
+  OS << '\t';
+
+  // The variant of the current asmprinter.
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
+
+  int CurVariant = -1;            // The number of the {.|.|.} region we are in.
+  const char *LastEmitted = AsmStr; // One past the last character emitted.
+  
+  while (*LastEmitted) {
+    switch (*LastEmitted) {
+    default: {
+      // Not a special case, emit the string section literally.
+      const char *LiteralEnd = LastEmitted+1;
+      while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+             *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+        ++LiteralEnd;
+      if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+        OS.write(LastEmitted, LiteralEnd-LastEmitted);
+      LastEmitted = LiteralEnd;
+      break;
+    }
+    case '\n':
+      ++LastEmitted;   // Consume newline character.
+      OS << '\n';      // Indent code with newline.
+      break;
+    case '$': {
+      ++LastEmitted;   // Consume '$' character.
+      bool Done = true;
+
+      // Handle escapes.
+      switch (*LastEmitted) {
+      default: Done = false; break;
+      case '$':     // $$ -> $
+        if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+          OS << '$';
+        ++LastEmitted;  // Consume second '$' character.
+        break;
+      case '(':             // $( -> same as GCC's { character.
+        ++LastEmitted;      // Consume '(' character.
+        if (CurVariant != -1)
+          report_fatal_error("Nested variants found in inline asm string: '" +
+                             Twine(AsmStr) + "'");
+        CurVariant = 0;     // We're in the first variant now.
+        break;
+      case '|':
+        ++LastEmitted;  // consume '|' character.
+        if (CurVariant == -1)
+          OS << '|';       // this is gcc's behavior for | outside a variant
+        else
+          ++CurVariant;   // We're in the next variant.
+        break;
+      case ')':         // $) -> same as GCC's } char.
+        ++LastEmitted;  // consume ')' character.
+        if (CurVariant == -1)
+          OS << '}';     // this is gcc's behavior for } outside a variant
+        else 
+          CurVariant = -1;
+        break;
+      }
+      if (Done) break;
+      
+      bool HasCurlyBraces = false;
+      if (*LastEmitted == '{') {     // ${variable}
+        ++LastEmitted;               // Consume '{' character.
+        HasCurlyBraces = true;
+      }
+      
+      // If we have ${:foo}, then this is not a real operand reference, it is a
+      // "magic" string reference, just like in .td files.  Arrange to call
+      // PrintSpecial.
+      if (HasCurlyBraces && *LastEmitted == ':') {
+        ++LastEmitted;
+        const char *StrStart = LastEmitted;
+        const char *StrEnd = strchr(StrStart, '}');
+        if (StrEnd == 0)
+          report_fatal_error("Unterminated ${:foo} operand in inline asm"
+                             " string: '" + Twine(AsmStr) + "'");
+        
+        std::string Val(StrStart, StrEnd);
+        PrintSpecial(MI, OS, Val.c_str());
+        LastEmitted = StrEnd+1;
+        break;
+      }
+            
+      const char *IDStart = LastEmitted;
+      const char *IDEnd = IDStart;
+      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;      
+      
+      unsigned Val;
+      if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+        report_fatal_error("Bad $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
+      LastEmitted = IDEnd;
+      
+      char Modifier[2] = { 0, 0 };
+      
+      if (HasCurlyBraces) {
+        // If we have curly braces, check for a modifier character.  This
+        // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+        if (*LastEmitted == ':') {
+          ++LastEmitted;    // Consume ':' character.
+          if (*LastEmitted == 0)
+            report_fatal_error("Bad ${:} expression in inline asm string: '" +
+                               Twine(AsmStr) + "'");
+          
+          Modifier[0] = *LastEmitted;
+          ++LastEmitted;    // Consume modifier character.
+        }
+        
+        if (*LastEmitted != '}')
+          report_fatal_error("Bad ${} expression in inline asm string: '" +
+                             Twine(AsmStr) + "'");
+        ++LastEmitted;    // Consume '}' character.
+      }
+      
+      if (Val >= NumOperands-1)
+        report_fatal_error("Invalid $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
+      
+      // Okay, we finally have a value number.  Ask the target to print this
+      // operand!
+      if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+        unsigned OpNo = 1;
+
+        bool Error = false;
+
+        // Scan to find the machine operand number for the operand.
+        for (; Val; --Val) {
+          if (OpNo >= MI->getNumOperands()) break;
+          unsigned OpFlags = MI->getOperand(OpNo).getImm();
+          OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+        }
+
+        if (OpNo >= MI->getNumOperands()) {
+          Error = true;
+        } else {
+          unsigned OpFlags = MI->getOperand(OpNo).getImm();
+          ++OpNo;  // Skip over the ID number.
+
+          if (Modifier[0] == 'l')  // labels are target independent
+            // FIXME: What if the operand isn't an MBB, report error?
+            OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
+          else {
+            AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+            if (InlineAsm::isMemKind(OpFlags)) {
+              Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+                                                Modifier[0] ? Modifier : 0,
+                                                OS);
+            } else {
+              Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+                                          Modifier[0] ? Modifier : 0, OS);
+            }
+          }
+        }
+        if (Error) {
+          std::string msg;
+          raw_string_ostream Msg(msg);
+          Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+          MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+        }
+      }
+      break;
+    }
+    }
+  }
+  OS << '\n' << (char)0;  // null terminate string.
+  EmitInlineAsm(OS.str(), LocCookie);
+  
+  // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
+  // enabled, so we use EmitRawText.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmEnd());
+}
+
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself.  This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings.  The
+/// syntax used is ${:comment}.  Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+                              const char *Code) const {
+  if (!strcmp(Code, "private")) {
+    OS << MAI->getPrivateGlobalPrefix();
+  } else if (!strcmp(Code, "comment")) {
+    OS << MAI->getCommentString();
+  } else if (!strcmp(Code, "uid")) {
+    // Comparing the address of MI isn't sufficient, because machineinstrs may
+    // be allocated to the same address across functions.
+    
+    // If this is a new LastFn instruction, bump the counter.
+    if (LastMI != MI || LastFn != getFunctionNumber()) {
+      ++Counter;
+      LastMI = MI;
+      LastFn = getFunctionNumber();
+    }
+    OS << Counter;
+  } else {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Unknown special formatter '" << Code
+         << "' for machine instr: " << *MI;
+    report_fatal_error(Msg.str());
+  }    
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant.  Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                 unsigned AsmVariant, const char *ExtraCode,
+                                 raw_ostream &O) {
+  // Target doesn't support this yet!
+  return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  // Target doesn't support this yet!
+  return true;
+}
+
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 066aaab..ca8b843 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -1,10 +1,11 @@
 add_llvm_library(LLVMAsmPrinter
   AsmPrinter.cpp
+  AsmPrinterDwarf.cpp
+  AsmPrinterInlineAsm.cpp
   DIE.cpp
   DwarfDebug.cpp
   DwarfException.cpp
-  DwarfLabel.cpp
-  DwarfPrinter.cpp
-  DwarfWriter.cpp
   OcamlGCPrinter.cpp
   )
+
+target_link_libraries (LLVMAsmPrinter LLVMMCParser)
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 63360c0..b2c70d5 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "DIE.h"
-#include "DwarfPrinter.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
@@ -53,14 +53,14 @@
 
 /// Emit - Print the abbreviation using the specified asm printer.
 ///
-void DIEAbbrev::Emit(const DwarfPrinter *DP) const {
+void DIEAbbrev::Emit(AsmPrinter *AP) const {
   // Emit its Dwarf tag type.
   // FIXME: Doing work even in non-asm-verbose runs.
-  DP->EmitULEB128(Tag, dwarf::TagString(Tag));
+  AP->EmitULEB128(Tag, dwarf::TagString(Tag));
 
   // Emit whether it has children DIEs.
   // FIXME: Doing work even in non-asm-verbose runs.
-  DP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+  AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
 
   // For each attribute description.
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -68,18 +68,18 @@
 
     // Emit attribute type.
     // FIXME: Doing work even in non-asm-verbose runs.
-    DP->EmitULEB128(AttrData.getAttribute(),
-                    dwarf::AttributeString(AttrData.getAttribute()));
+    AP->EmitULEB128(AttrData.getAttribute(),
+                              dwarf::AttributeString(AttrData.getAttribute()));
 
     // Emit form type.
     // FIXME: Doing work even in non-asm-verbose runs.
-    DP->EmitULEB128(AttrData.getForm(),
+    AP->EmitULEB128(AttrData.getForm(),
                     dwarf::FormEncodingString(AttrData.getForm()));
   }
 
   // Mark end of abbreviation.
-  DP->EmitULEB128(0, "EOM(1)");
-  DP->EmitULEB128(0, "EOM(2)");
+  AP->EmitULEB128(0, "EOM(1)");
+  AP->EmitULEB128(0, "EOM(2)");
 }
 
 #ifndef NDEBUG
@@ -114,10 +114,11 @@
 
 /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
 ///
-void DIE::addSiblingOffset() {
-  DIEInteger *DI = new DIEInteger(0);
+DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
+  DIEInteger *DI = new (A) DIEInteger(0);
   Values.insert(Values.begin(), DI);
   Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+  return DI;
 }
 
 #ifndef NDEBUG
@@ -131,17 +132,15 @@
       << "Die: "
       << format("0x%lx", (long)(intptr_t)this)
       << ", Offset: " << Offset
-      << ", Size: " << Size
-      << "\n";
+      << ", Size: " << Size << "\n";
 
     O << Indent
       << dwarf::TagString(Abbrev.getTag())
       << " "
-      << dwarf::ChildrenString(Abbrev.getChildrenFlag());
+      << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n";
   } else {
-    O << "Size: " << Size;
+    O << "Size: " << Size << "\n";
   }
-  O << "\n";
 
   const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
 
@@ -188,8 +187,7 @@
 
 /// EmitValue - Emit integer of appropriate size.
 ///
-void DIEInteger::EmitValue(DwarfPrinter *D, unsigned Form) const {
-  const AsmPrinter *Asm = D->getAsm();
+void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   unsigned Size = ~0U;
   switch (Form) {
   case dwarf::DW_FORM_flag:  // Fall thru
@@ -201,8 +199,8 @@
   case dwarf::DW_FORM_data4: Size = 4; break;
   case dwarf::DW_FORM_ref8:  // Fall thru
   case dwarf::DW_FORM_data8: Size = 8; break;
-  case dwarf::DW_FORM_udata: D->EmitULEB128(Integer); return;
-  case dwarf::DW_FORM_sdata: D->EmitSLEB128(Integer, ""); return;
+  case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
+  case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
   Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -210,7 +208,7 @@
 
 /// SizeOf - Determine size of integer value in bytes.
 ///
-unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const {
+unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   switch (Form) {
   case dwarf::DW_FORM_flag:  // Fall thru
   case dwarf::DW_FORM_ref1:  // Fall thru
@@ -241,10 +239,10 @@
 
 /// EmitValue - Emit string value.
 ///
-void DIEString::EmitValue(DwarfPrinter *D, unsigned Form) const {
-  D->getAsm()->OutStreamer.EmitBytes(Str, /*addrspace*/0);
+void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
   // Emit nul terminator.
-  D->getAsm()->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
+  AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
 }
 
 #ifndef NDEBUG
@@ -254,82 +252,25 @@
 #endif
 
 //===----------------------------------------------------------------------===//
-// DIEDwarfLabel Implementation
+// DIELabel Implementation
 //===----------------------------------------------------------------------===//
 
 /// EmitValue - Emit label value.
 ///
-void DIEDwarfLabel::EmitValue(DwarfPrinter *D, unsigned Form) const {
-  bool IsSmall = Form == dwarf::DW_FORM_data4;
-  D->EmitReference(Label, false, IsSmall);
+void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/);
 }
 
 /// SizeOf - Determine size of label value in bytes.
 ///
-unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const {
+unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
-  return TD->getPointerSize();
+  return AP->getTargetData().getPointerSize();
 }
 
 #ifndef NDEBUG
-void DIEDwarfLabel::print(raw_ostream &O) {
-  O << "Lbl: ";
-  Label.print(O);
-}
-#endif
-
-//===----------------------------------------------------------------------===//
-// DIEObjectLabel Implementation
-//===----------------------------------------------------------------------===//
-
-/// EmitValue - Emit label value.
-///
-void DIEObjectLabel::EmitValue(DwarfPrinter *D, unsigned Form) const {
-  bool IsSmall = Form == dwarf::DW_FORM_data4;
-  D->EmitReference(Sym, false, IsSmall);
-}
-
-/// SizeOf - Determine size of label value in bytes.
-///
-unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const {
-  if (Form == dwarf::DW_FORM_data4) return 4;
-  return TD->getPointerSize();
-}
-
-#ifndef NDEBUG
-void DIEObjectLabel::print(raw_ostream &O) {
-  O << "Obj: " << Sym->getName();
-}
-#endif
-
-//===----------------------------------------------------------------------===//
-// DIESectionOffset Implementation
-//===----------------------------------------------------------------------===//
-
-/// EmitValue - Emit delta value.
-///
-void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const {
-  bool IsSmall = Form == dwarf::DW_FORM_data4;
-  D->EmitSectionOffset(Label.getTag(), Section.getTag(),
-                       Label.getNumber(), Section.getNumber(),
-                       IsSmall, IsEH, UseSet);
-  D->getAsm()->O << '\n'; // FIXME: Necesssary?
-}
-
-/// SizeOf - Determine size of delta value in bytes.
-///
-unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const {
-  if (Form == dwarf::DW_FORM_data4) return 4;
-  return TD->getPointerSize();
-}
-
-#ifndef NDEBUG
-void DIESectionOffset::print(raw_ostream &O) {
-  O << "Off: ";
-  Label.print(O);
-  O << "-";
-  Section.print(O);
-  O << "-" << IsEH << "-" << UseSet;
+void DIELabel::print(raw_ostream &O) {
+  O << "Lbl: " << Label->getName();
 }
 #endif
 
@@ -339,24 +280,20 @@
 
 /// EmitValue - Emit delta value.
 ///
-void DIEDelta::EmitValue(DwarfPrinter *D, unsigned Form) const {
-  bool IsSmall = Form == dwarf::DW_FORM_data4;
-  D->EmitDifference(LabelHi, LabelLo, IsSmall);
+void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of delta value in bytes.
 ///
-unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const {
+unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
-  return TD->getPointerSize();
+  return AP->getTargetData().getPointerSize();
 }
 
 #ifndef NDEBUG
 void DIEDelta::print(raw_ostream &O) {
-  O << "Del: ";
-  LabelHi.print(O);
-  O << "-";
-  LabelLo.print(O);
+  O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
 }
 #endif
 
@@ -366,8 +303,8 @@
 
 /// EmitValue - Emit debug information entry offset.
 ///
-void DIEEntry::EmitValue(DwarfPrinter *D, unsigned Form) const {
-  D->getAsm()->EmitInt32(Entry->getOffset());
+void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->EmitInt32(Entry->getOffset());
 }
 
 #ifndef NDEBUG
@@ -382,11 +319,11 @@
 
 /// ComputeSize - calculate the size of the block.
 ///
-unsigned DIEBlock::ComputeSize(const TargetData *TD) {
+unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
   if (!Size) {
     const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
     for (unsigned i = 0, N = Values.size(); i < N; ++i)
-      Size += Values[i]->SizeOf(TD, AbbrevData[i].getForm());
+      Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
   }
 
   return Size;
@@ -394,31 +331,28 @@
 
 /// EmitValue - Emit block data.
 ///
-void DIEBlock::EmitValue(DwarfPrinter *D, unsigned Form) const {
-  const AsmPrinter *Asm = D->getAsm();
+void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   switch (Form) {
-  case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);         break;
-  case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);        break;
-  case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);        break;
-  case dwarf::DW_FORM_block:  D->EmitULEB128(Size); break;
-  default: llvm_unreachable("Improper form for block");         break;
+  default: assert(0 && "Improper form for block");    break;
+  case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
+  case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break;
+  case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break;
+  case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break;
   }
 
   const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
-  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
-    Asm->O << '\n';
-    Values[i]->EmitValue(D, AbbrevData[i].getForm());
-  }
+  for (unsigned i = 0, N = Values.size(); i < N; ++i)
+    Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
 }
 
 /// SizeOf - Determine size of block data in bytes.
 ///
-unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const {
+unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
   switch (Form) {
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
   case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
-  case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
+  case dwarf::DW_FORM_block:  return Size + MCAsmInfo::getULEB128Size(Size);
   default: llvm_unreachable("Improper form for block"); break;
   }
   return 0;
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index af90289..9cb8314 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -14,7 +14,6 @@
 #ifndef CODEGEN_ASMPRINTER_DIE_H__
 #define CODEGEN_ASMPRINTER_DIE_H__
 
-#include "DwarfLabel.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
@@ -23,9 +22,8 @@
 
 namespace llvm {
   class AsmPrinter;
-  class DwarfPrinter;
-  class TargetData;
   class MCSymbol;
+  class raw_ostream;
 
   //===--------------------------------------------------------------------===//
   /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
@@ -101,7 +99,7 @@
 
     /// Emit - Print the abbreviation using the specified asm printer.
     ///
-    void Emit(const DwarfPrinter *DP) const;
+    void Emit(AsmPrinter *AP) const;
 
 #ifndef NDEBUG
     void print(raw_ostream &O);
@@ -112,7 +110,6 @@
   //===--------------------------------------------------------------------===//
   /// DIE - A structured debug information entry.  Has an abbreviation which
   /// describes it's organization.
-  class CompileUnit;
   class DIEValue;
 
   class DIE {
@@ -154,12 +151,11 @@
     unsigned getOffset() const { return Offset; }
     unsigned getSize() const { return Size; }
     const std::vector<DIE *> &getChildren() const { return Children; }
-    SmallVector<DIEValue*, 32> &getValues() { return Values; }
+    const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
     DIE *getParent() const { return Parent; }
     void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
     void setOffset(unsigned O) { Offset = O; }
     void setSize(unsigned S) { Size = S; }
-    void setParent(DIE *P) { Parent = P; }
     
     /// addValue - Add a value and attributes to a DIE.
     ///
@@ -173,8 +169,10 @@
     unsigned getSiblingOffset() const { return Offset + Size; }
 
     /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+    /// The caller is responsible for deleting the return value at or after the
+    /// same time it destroys this DIE.
     ///
-    void addSiblingOffset();
+    DIEValue *addSiblingOffset(BumpPtrAllocator &A);
 
     /// addChild - Add a child to the DIE.
     ///
@@ -185,7 +183,7 @@
       }
       Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
       Children.push_back(Child);
-      Child->setParent(this);
+      Child->Parent = this;
     }
 
 #ifndef NDEBUG
@@ -203,7 +201,6 @@
       isInteger,
       isString,
       isLabel,
-      isAsIsLabel,
       isSectionOffset,
       isDelta,
       isEntry,
@@ -222,11 +219,11 @@
 
     /// EmitValue - Emit value via the Dwarf writer.
     ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const = 0;
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0;
 
     /// SizeOf - Return the size of a value in bytes.
     ///
-    virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0;
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
 
     // Implement isa/cast/dyncast.
     static bool classof(const DIEValue *) { return true; }
@@ -262,12 +259,13 @@
 
     /// EmitValue - Emit integer of appropriate size.
     ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    uint64_t getValue() const { return Integer; }
 
     /// SizeOf - Determine size of integer value in bytes.
     ///
-    virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
-
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
     static bool classof(const DIEInteger *) { return true; }
@@ -288,11 +286,11 @@
 
     /// EmitValue - Emit string value.
     ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
 
     /// SizeOf - Determine size of string value in bytes.
     ///
-    virtual unsigned SizeOf(const TargetData *, unsigned /*Form*/) const {
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const {
       return Str.size() + sizeof(char); // sizeof('\0');
     }
 
@@ -306,23 +304,23 @@
   };
 
   //===--------------------------------------------------------------------===//
-  /// DIEDwarfLabel - A Dwarf internal label expression DIE.
+  /// DIELabel - A label expression DIE.
   //
-  class DIEDwarfLabel : public DIEValue {
-    const DWLabel Label;
+  class DIELabel : public DIEValue {
+    const MCSymbol *Label;
   public:
-    explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
+    explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
 
     /// EmitValue - Emit label value.
     ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
 
     /// SizeOf - Determine size of label value in bytes.
     ///
-    virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEDwarfLabel *)  { return true; }
+    static bool classof(const DIELabel *)  { return true; }
     static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
 
 #ifndef NDEBUG
@@ -331,83 +329,22 @@
   };
 
   //===--------------------------------------------------------------------===//
-  /// DIEObjectLabel - A label to an object in code or data.
-  //
-  class DIEObjectLabel : public DIEValue {
-    const MCSymbol *Sym;
-  public:
-    explicit DIEObjectLabel(const MCSymbol *S)
-      : DIEValue(isAsIsLabel), Sym(S) {}
-
-    /// EmitValue - Emit label value.
-    ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
-
-    /// SizeOf - Determine size of label value in bytes.
-    ///
-    virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEObjectLabel *) { return true; }
-    static bool classof(const DIEValue *L) {
-      return L->getType() == isAsIsLabel;
-    }
-
-#ifndef NDEBUG
-    virtual void print(raw_ostream &O);
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIESectionOffset - A section offset DIE.
-  ///
-  class DIESectionOffset : public DIEValue {
-    const DWLabel Label;
-    const DWLabel Section;
-    bool IsEH : 1;
-    bool UseSet : 1;
-  public:
-    DIESectionOffset(const DWLabel &Lab, const DWLabel &Sec,
-                     bool isEH = false, bool useSet = true)
-      : DIEValue(isSectionOffset), Label(Lab), Section(Sec),
-        IsEH(isEH), UseSet(useSet) {}
-
-    /// EmitValue - Emit section offset.
-    ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
-
-    /// SizeOf - Determine size of section offset value in bytes.
-    ///
-    virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIESectionOffset *)  { return true; }
-    static bool classof(const DIEValue *D) {
-      return D->getType() == isSectionOffset;
-    }
-
-#ifndef NDEBUG
-    virtual void print(raw_ostream &O);
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
   /// DIEDelta - A simple label difference DIE.
   ///
   class DIEDelta : public DIEValue {
-    const DWLabel LabelHi;
-    const DWLabel LabelLo;
+    const MCSymbol *LabelHi;
+    const MCSymbol *LabelLo;
   public:
-    DIEDelta(const DWLabel &Hi, const DWLabel &Lo)
+    DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
       : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
 
     /// EmitValue - Emit delta value.
     ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
 
     /// SizeOf - Determine size of delta value in bytes.
     ///
-    virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
     static bool classof(const DIEDelta *)  { return true; }
@@ -423,20 +360,19 @@
   /// this class can also be used as a proxy for a debug information entry not
   /// yet defined (ie. types.)
   class DIEEntry : public DIEValue {
-    DIE *Entry;
+    DIE *const Entry;
   public:
     explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
 
     DIE *getEntry() const { return Entry; }
-    void setEntry(DIE *E) { Entry = E; }
 
     /// EmitValue - Emit debug information entry offset.
     ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
 
     /// SizeOf - Determine size of debug information entry in bytes.
     ///
-    virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const {
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const {
       return sizeof(int32_t);
     }
 
@@ -461,7 +397,7 @@
 
     /// ComputeSize - calculate the size of the block.
     ///
-    unsigned ComputeSize(const TargetData *TD);
+    unsigned ComputeSize(AsmPrinter *AP);
 
     /// BestForm - Choose the best form for data.
     ///
@@ -474,11 +410,11 @@
 
     /// EmitValue - Emit block data.
     ///
-    virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
 
     /// SizeOf - Determine size of block data in bytes.
     ///
-    virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
     static bool classof(const DIEBlock *)  { return true; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5ad1e5e..3ce0711 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -10,19 +10,27 @@
 // This file contains support for writing dwarf debug info into asm files.
 //
 //===----------------------------------------------------------------------===//
+
 #define DEBUG_TYPE "dwarfdebug"
 #include "DwarfDebug.h"
+#include "DIE.h"
+#include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -32,6 +40,11 @@
 #include "llvm/System/Path.h"
 using namespace llvm;
 
+namespace {
+  const char *DWARFGroupName = "DWARF Emission";
+  const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
 //===----------------------------------------------------------------------===//
 
 /// Configuration values for initial hash set sizes (log2).
@@ -50,9 +63,9 @@
 
   /// Die - Compile unit debug information entry.
   ///
-  DIE *CUDie;
+  const OwningPtr<DIE> CUDie;
 
-  /// IndexTyDie - An anonymous type for index type.
+  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
   DIE *IndexTyDie;
 
   /// GVToDieMap - Tracks the mapping of unit level debug informaton
@@ -76,11 +89,10 @@
 public:
   CompileUnit(unsigned I, DIE *D)
     : ID(I), CUDie(D), IndexTyDie(0) {}
-  ~CompileUnit() { delete CUDie; delete IndexTyDie; }
 
   // Accessors.
   unsigned getID()                  const { return ID; }
-  DIE* getCUDie()                   const { return CUDie; }
+  DIE* getCUDie()                   const { return CUDie.get(); }
   const StringMap<DIE*> &getGlobals()     const { return Globals; }
   const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
 
@@ -90,11 +102,11 @@
 
   /// addGlobal - Add a new global entity to the compile unit.
   ///
-  void addGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
 
   /// addGlobalType - Add a new global type to the compile unit.
   ///
-  void addGlobalType(const std::string &Name, DIE *Die) { 
+  void addGlobalType(StringRef Name, DIE *Die) { 
     GlobalTypes[Name] = Die; 
   }
 
@@ -146,16 +158,27 @@
 class DbgVariable {
   DIVariable Var;                    // Variable Descriptor.
   unsigned FrameIndex;               // Variable frame index.
-  DbgVariable *AbstractVar;          // Abstract variable for this variable.
+  const MachineInstr *DbgValueMInsn; // DBG_VALUE
+  // DbgValueLabel - DBG_VALUE is effective from this label.
+  MCSymbol *DbgValueLabel;
+  DbgVariable *const AbstractVar;    // Abstract variable for this variable.
   DIE *TheDIE;
 public:
-  DbgVariable(DIVariable V, unsigned I)
-    : Var(V), FrameIndex(I), AbstractVar(0), TheDIE(0)  {}
+  // AbsVar may be NULL.
+  DbgVariable(DIVariable V, unsigned I, DbgVariable *AbsVar)
+    : Var(V), FrameIndex(I), DbgValueMInsn(0), 
+      DbgValueLabel(0), AbstractVar(AbsVar), TheDIE(0) {}
+  DbgVariable(DIVariable V, const MachineInstr *MI, DbgVariable *AbsVar)
+    : Var(V), FrameIndex(0), DbgValueMInsn(MI), DbgValueLabel(0),
+      AbstractVar(AbsVar), TheDIE(0)
+    {}
 
   // Accessors.
   DIVariable getVariable()           const { return Var; }
   unsigned getFrameIndex()           const { return FrameIndex; }
-  void setAbstractVariable(DbgVariable *V) { AbstractVar = V; }
+  const MachineInstr *getDbgValue()  const { return DbgValueMInsn; }
+  MCSymbol *getDbgValueLabel()       const { return DbgValueLabel; }
+  void setDbgValueLabel(MCSymbol *L)       { DbgValueLabel = L; }
   DbgVariable *getAbstractVariable() const { return AbstractVar; }
   void setDIE(DIE *D)                      { TheDIE = D; }
   DIE *getDIE()                      const { return TheDIE; }
@@ -170,19 +193,18 @@
   // Location at which this scope is inlined.
   AssertingVH<MDNode> InlinedAtLocation;  
   bool AbstractScope;                 // Abstract Scope
-  unsigned StartLabelID;              // Label ID of the beginning of scope.
-  unsigned EndLabelID;                // Label ID of the end of scope.
   const MachineInstr *LastInsn;       // Last instruction of this scope.
   const MachineInstr *FirstInsn;      // First instruction of this scope.
-  SmallVector<DbgScope *, 4> Scopes;  // Scopes defined in scope.
-  SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
+  // Scopes defined in scope.  Contents not owned.
+  SmallVector<DbgScope *, 4> Scopes;
+  // Variables declared in scope.  Contents owned.
+  SmallVector<DbgVariable *, 8> Variables;
 
   // Private state for dump()
   mutable unsigned IndentLevel;
 public:
   DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
-      StartLabelID(0), EndLabelID(0),
       LastInsn(0), FirstInsn(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
@@ -190,16 +212,10 @@
   DbgScope *getParent()          const { return Parent; }
   void setParent(DbgScope *P)          { Parent = P; }
   DIDescriptor getDesc()         const { return Desc; }
-  MDNode *getInlinedAt()         const {
-    return InlinedAtLocation;
-  }
+  MDNode *getInlinedAt()         const { return InlinedAtLocation; }
   MDNode *getScopeNode()         const { return Desc.getNode(); }
-  unsigned getStartLabelID()     const { return StartLabelID; }
-  unsigned getEndLabelID()       const { return EndLabelID; }
-  SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
-  SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
-  void setStartLabelID(unsigned S) { StartLabelID = S; }
-  void setEndLabelID(unsigned E)   { EndLabelID = E; }
+  const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
+  const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
   void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
   const MachineInstr *getLastInsn()      { return LastInsn; }
   void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
@@ -217,17 +233,17 @@
 
   void fixInstructionMarkers(DenseMap<const MachineInstr *, 
                              unsigned> &MIIndexMap) {
-    assert (getFirstInsn() && "First instruction is missing!");
+    assert(getFirstInsn() && "First instruction is missing!");
     
     // Use the end of last child scope as end of this scope.
-    SmallVector<DbgScope *, 4> &Scopes = getScopes();
+    const SmallVector<DbgScope *, 4> &Scopes = getScopes();
     const MachineInstr *LastInsn = getFirstInsn();
     unsigned LIndex = 0;
     if (Scopes.empty()) {
-      assert (getLastInsn() && "Inner most scope does not have last insn!");
+      assert(getLastInsn() && "Inner most scope does not have last insn!");
       return;
     }
-    for (SmallVector<DbgScope *, 4>::iterator SI = Scopes.begin(),
+    for (SmallVector<DbgScope *, 4>::const_iterator SI = Scopes.begin(),
            SE = Scopes.end(); SI != SE; ++SI) {
       DbgScope *DS = *SI;
       DS->fixInstructionMarkers(MIIndexMap);
@@ -256,6 +272,8 @@
   void dump() const;
 #endif
 };
+  
+} // end llvm namespace
 
 #ifndef NDEBUG
 void DbgScope::dump() const {
@@ -263,7 +281,6 @@
   err.indent(IndentLevel);
   MDNode *N = Desc.getNode();
   N->dump();
-  err << " [" << StartLabelID << ", " << EndLabelID << "]\n";
   if (AbstractScope)
     err << "Abstract Scope\n";
 
@@ -279,30 +296,40 @@
 #endif
 
 DbgScope::~DbgScope() {
-  for (unsigned i = 0, N = Scopes.size(); i < N; ++i)
-    delete Scopes[i];
   for (unsigned j = 0, M = Variables.size(); j < M; ++j)
     delete Variables[j];
 }
 
-} // end llvm namespace
+DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+  : Asm(A), MMI(Asm->MMI), ModuleCU(0),
+    AbbreviationsSet(InitAbbreviationsSetSize), 
+    CurrentFnDbgScope(0), PrevLabel(NULL) {
+  NextStringPoolNumber = 0;
+      
+  DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
+  DwarfStrSectionSym = TextSectionSym = 0;
 
-DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
-  : DwarfPrinter(OS, A, T, "dbg"), ModuleCU(0),
-    AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
-    DIEValues(), StringPool(),
-    SectionSourceLines(), didInitial(false), shouldEmit(false),
-    CurrentFnDbgScope(0), DebugTimer(0) {
-  if (TimePassesIsEnabled)
-    DebugTimer = new Timer("Dwarf Debug Writer");
+  if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      beginModule(M);
+  } else {
+      beginModule(M);
+  }
 }
 DwarfDebug::~DwarfDebug() {
-  for (unsigned j = 0, M = DIEValues.size(); j < M; ++j)
-    delete DIEValues[j];
-
-  delete DebugTimer;
+  for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+    DIEBlocks[j]->~DIEBlock();
 }
 
+MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
+  std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
+  if (Entry.first) return Entry.first;
+
+  Entry.second = NextStringPoolNumber++;
+  return Entry.first = Asm->GetTempSymbol("string", Entry.second);
+}
+
+
 /// assignAbbrevNumber - Define a unique number for the abbreviation.
 ///
 void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
@@ -329,8 +356,7 @@
 /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
 /// information entry.
 DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
-  DIEEntry *Value = new DIEEntry(Entry);
-  DIEValues.push_back(Value);
+  DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
   return Value;
 }
 
@@ -339,8 +365,7 @@
 void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
                          unsigned Form, uint64_t Integer) {
   if (!Form) Form = DIEInteger::BestForm(false, Integer);
-  DIEValue *Value = new DIEInteger(Integer);
-  DIEValues.push_back(Value);
+  DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
   Die->addValue(Attribute, Form, Value);
 }
 
@@ -349,8 +374,7 @@
 void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
                          unsigned Form, int64_t Integer) {
   if (!Form) Form = DIEInteger::BestForm(true, Integer);
-  DIEValue *Value = new DIEInteger(Integer);
-  DIEValues.push_back(Value);
+  DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
   Die->addValue(Attribute, Form, Value);
 }
 
@@ -358,54 +382,40 @@
 /// keeps string reference. 
 void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
                            StringRef String) {
-  DIEValue *Value = new DIEString(String);
-  DIEValues.push_back(Value);
+  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
   Die->addValue(Attribute, Form, Value);
 }
 
 /// addLabel - Add a Dwarf label attribute data and value.
 ///
 void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
-                          const DWLabel &Label) {
-  DIEValue *Value = new DIEDwarfLabel(Label);
-  DIEValues.push_back(Value);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addObjectLabel - Add an non-Dwarf label attribute data and value.
-///
-void DwarfDebug::addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
-                                const MCSymbol *Sym) {
-  DIEValue *Value = new DIEObjectLabel(Sym);
-  DIEValues.push_back(Value);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addSectionOffset - Add a section offset label attribute data and value.
-///
-void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
-                                  const DWLabel &Label, const DWLabel &Section,
-                                  bool isEH, bool useSet) {
-  DIEValue *Value = new DIESectionOffset(Label, Section, isEH, useSet);
-  DIEValues.push_back(Value);
+                          const MCSymbol *Label) {
+  DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
   Die->addValue(Attribute, Form, Value);
 }
 
 /// addDelta - Add a label delta attribute data and value.
 ///
 void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
-                          const DWLabel &Hi, const DWLabel &Lo) {
-  DIEValue *Value = new DIEDelta(Hi, Lo);
-  DIEValues.push_back(Value);
+                          const MCSymbol *Hi, const MCSymbol *Lo) {
+  DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
   Die->addValue(Attribute, Form, Value);
 }
 
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+                             DIE *Entry) {
+  Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+
 /// addBlock - Add block data.
 ///
 void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
                           DIEBlock *Block) {
-  Block->ComputeSize(TD);
-  DIEValues.push_back(Block);
+  Block->ComputeSize(Asm);
+  DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
   Die->addValue(Attribute, Block->BestForm(), Block);
 }
 
@@ -413,11 +423,12 @@
 /// entry.
 void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
   // If there is no compile unit specified, don't add a line #.
-  if (V->getCompileUnit().isNull())
+  if (!V->getCompileUnit().Verify())
     return;
 
   unsigned Line = V->getLineNumber();
-  unsigned FileID = findCompileUnit(V->getCompileUnit())->getID();
+  unsigned FileID = GetOrCreateSourceID(V->getContext().getDirectory(),
+                                        V->getContext().getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -427,11 +438,12 @@
 /// entry.
 void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
   // If there is no compile unit specified, don't add a line #.
-  if (G->getCompileUnit().isNull())
+  if (!G->getCompileUnit().Verify())
     return;
 
   unsigned Line = G->getLineNumber();
-  unsigned FileID = findCompileUnit(G->getCompileUnit())->getID();
+  unsigned FileID = GetOrCreateSourceID(G->getContext().getDirectory(),
+                                        G->getContext().getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -441,15 +453,17 @@
 /// entry.
 void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
   // If there is no compile unit specified, don't add a line #.
-  if (SP->getCompileUnit().isNull())
+  if (!SP->getCompileUnit().Verify())
     return;
   // If the line number is 0, don't add it.
   if (SP->getLineNumber() == 0)
     return;
 
-
   unsigned Line = SP->getLineNumber();
-  unsigned FileID = findCompileUnit(SP->getCompileUnit())->getID();
+  if (!SP->getContext().Verify())
+    return;
+  unsigned FileID = GetOrCreateSourceID(SP->getDirectory(),
+                                        SP->getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -460,11 +474,14 @@
 void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
   // If there is no compile unit specified, don't add a line #.
   DICompileUnit CU = Ty->getCompileUnit();
-  if (CU.isNull())
+  if (!CU.Verify())
     return;
 
   unsigned Line = Ty->getLineNumber();
-  unsigned FileID = findCompileUnit(CU)->getID();
+  if (!Ty->getContext().Verify())
+    return;
+  unsigned FileID = GetOrCreateSourceID(Ty->getContext().getDirectory(),
+                                        Ty->getContext().getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -474,7 +491,7 @@
 /// entry.
 void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) {
   // If there is no compile unit specified, don't add a line #.
-  if (NS->getCompileUnit().isNull())
+  if (!NS->getCompileUnit().Verify())
     return;
 
   unsigned Line = NS->getLineNumber();
@@ -526,12 +543,8 @@
   }
 
   DICompositeType blockStruct = DICompositeType(subType.getNode());
-
   DIArray Elements = blockStruct.getTypeArray();
 
-  if (Elements.isNull())
-    return Ty;
-
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
     DIDerivedType DT = DIDerivedType(Element.getNode());
@@ -555,8 +568,9 @@
 
   // Decode the original location, and use that as the start of the byref
   // variable's location.
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new DIEBlock();
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
 
   if (Location.isReg()) {
     if (Reg < 32) {
@@ -677,7 +691,6 @@
   DIDescriptor varField = DIDescriptor();
   DIDescriptor forwardingField = DIDescriptor();
 
-
   for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Fields.getElement(i);
     DIDerivedType DT = DIDerivedType(Element.getNode());
@@ -688,20 +701,17 @@
       varField = Element;
   }
 
-  assert(!varField.isNull() && "Can't find byref variable in Block struct");
-  assert(!forwardingField.isNull()
-         && "Can't find forwarding field in Block struct");
-
   // Get the offsets for the forwarding field and the variable field.
-  unsigned int forwardingFieldOffset =
+  unsigned forwardingFieldOffset =
     DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3;
-  unsigned int varFieldOffset =
+  unsigned varFieldOffset =
     DIDerivedType(varField.getNode()).getOffsetInBits() >> 3;
 
   // Decode the original location, and use that as the start of the byref
   // variable's location.
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new DIEBlock();
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
 
   if (Location.isReg()) {
     if (Reg < 32)
@@ -755,8 +765,9 @@
 /// provided.
 void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
                             const MachineLocation &Location) {
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new DIEBlock();
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
 
   if (Location.isReg()) {
     if (Reg < 32) {
@@ -781,9 +792,7 @@
 
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
-  if (Context.isNull())
-    ModuleCU->addDie(Die);
-  else if (Context.isType()) {
+  if (Context.isType()) {
     DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode()));
     ContextDIE->addChild(Die);
   } else if (Context.isNameSpace()) {
@@ -820,7 +829,7 @@
 
 /// addType - Add a new type attribute to the specified entity.
 void DwarfDebug::addType(DIE *Entity, DIType Ty) {
-  if (Ty.isNull())
+  if (!Ty.isValid())
     return;
 
   // Check for pre-existence.
@@ -831,14 +840,13 @@
     return;
   }
 
-  // Set up proxy.
-  Entry = createDIEEntry();
-  ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
-
   // Construct type.
   DIE *Buffer = getOrCreateTypeDIE(Ty);
 
-  Entry->setEntry(Buffer);
+  // Set up proxy.
+  Entry = createDIEEntry(Buffer);
+  ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
+
   Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
 }
 
@@ -906,9 +914,9 @@
     // Add enumerators to enumeration type.
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIE *ElemDie = NULL;
-      DIEnumerator Enum(Elements.getElement(i).getNode());
-      if (!Enum.isNull()) {
-        ElemDie = constructEnumTypeDIE(&Enum);
+      DIDescriptor Enum(Elements.getElement(i).getNode());
+      if (Enum.isEnumerator()) {
+        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum.getNode()));
         Buffer.addChild(ElemDie);
       }
     }
@@ -939,18 +947,17 @@
     DIArray Elements = CTy.getTypeArray();
 
     // A forward struct declared type may not have elements available.
-    if (Elements.isNull())
+    unsigned N = Elements.getNumElements();
+    if (N == 0)
       break;
 
     // Add elements to structure type.
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    for (unsigned i = 0; i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
-      if (Element.isNull())
-        continue;
       DIE *ElemDie = NULL;
-      if (Element.getTag() == dwarf::DW_TAG_subprogram)
+      if (Element.isSubprogram())
         ElemDie = createSubprogramDIE(DISubprogram(Element.getNode()));
-      else if (Element.getTag() == dwarf::DW_TAG_auto_variable) {
+      else if (Element.isVariable()) {
         DIVariable DV(Element.getNode());
         ElemDie = new DIE(dwarf::DW_TAG_variable);
         addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
@@ -959,8 +966,10 @@
         addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
         addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
         addSourceLine(ElemDie, &DV);
-      } else
+      } else if (Element.isDerivedType())
         ElemDie = createMemberDIE(DIDerivedType(Element.getNode()));
+      else
+        continue;
       Buffer.addChild(ElemDie);
     }
 
@@ -973,7 +982,7 @@
               dwarf::DW_FORM_data1, RLang);
 
     DICompositeType ContainingType = CTy.getContainingType();
-    if (!ContainingType.isNull())
+    if (DIDescriptor(ContainingType.getNode()).isCompositeType())
       addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, 
                   getOrCreateTypeDIE(DIType(ContainingType.getNode())));
     break;
@@ -1051,11 +1060,11 @@
 }
 
 /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) {
+DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) {
   DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
-  StringRef Name = ETy->getName();
+  StringRef Name = ETy.getName();
   addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-  int64_t Value = ETy->getEnumValue();
+  int64_t Value = ETy.getEnumValue();
   addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
   return Enumerator;
 }
@@ -1105,7 +1114,7 @@
 
   addSourceLine(MemberDie, &DT);
 
-  DIEBlock *MemLocationDie = new DIEBlock();
+  DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
   addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
 
   uint64_t Size = DT.getSizeInBits();
@@ -1123,7 +1132,8 @@
     Offset -= FieldOffset;
 
     // Maybe we need to work from the other end.
-    if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
+    if (Asm->getTargetData().isLittleEndian())
+      Offset = FieldSize - (Offset + Size);
     addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
 
     // Here WD_AT_data_member_location points to the anonymous
@@ -1141,7 +1151,7 @@
     // expression to extract appropriate offset from vtable.
     // BaseAddr = ObAddr + *((*ObAddr) - Offset)
 
-    DIEBlock *VBaseLocationDie = new DIEBlock();
+    DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
     addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
     addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
@@ -1199,7 +1209,7 @@
   DIArray Args = SPTy.getTypeArray();
   unsigned SPTag = SPTy.getTag();
 
-  if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
+  if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
     addType(SPDie, SPTy);
   else
     addType(SPDie, DIType(Args.getElement(0).getNode()));
@@ -1207,7 +1217,7 @@
   unsigned VK = SP.getVirtuality();
   if (VK) {
     addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
-    DIEBlock *Block = new DIEBlock();
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
@@ -1240,26 +1250,20 @@
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
   ModuleCU->insertDIE(SP.getNode(), SPDie);
+  
+  if (!DisableFramePointerElim(*Asm->MF))
+    addUInt(SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr, dwarf::DW_FORM_flag, 1);
+
   return SPDie;
 }
 
-/// findCompileUnit - Get the compile unit for the given descriptor.
-///
-CompileUnit *DwarfDebug::findCompileUnit(DICompileUnit Unit) {
-  DenseMap<Value *, CompileUnit *>::const_iterator I =
-    CompileUnitMap.find(Unit.getNode());
-  if (I == CompileUnitMap.end())
-    return constructCompileUnit(Unit.getNode());
-  return I->second;
-}
-
-/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction.
-/// Initialize scope and update scope hierarchy.
+/// getUpdatedDbgScope - Find DbgScope assicated with the instruction.
+/// Update scope hierarchy. Create abstract scope if required.
 DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
-  MDNode *InlinedAt) {
-  assert (N && "Invalid Scope encoding!");
-  assert (MI && "Missing machine instruction!");
-  bool GetConcreteScope = (MI && InlinedAt);
+                                         MDNode *InlinedAt) {
+  assert(N && "Invalid Scope encoding!");
+  assert(MI && "Missing machine instruction!");
+  bool isAConcreteScope = InlinedAt != 0;
 
   DbgScope *NScope = NULL;
 
@@ -1267,37 +1271,35 @@
     NScope = DbgScopeMap.lookup(InlinedAt);
   else
     NScope = DbgScopeMap.lookup(N);
-  assert (NScope && "Unable to find working scope!");
+  assert(NScope && "Unable to find working scope!");
 
   if (NScope->getFirstInsn())
     return NScope;
 
   DbgScope *Parent = NULL;
-  if (GetConcreteScope) {
+  if (isAConcreteScope) {
     DILocation IL(InlinedAt);
     Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
                          IL.getOrigLocation().getNode());
-    assert (Parent && "Unable to find Parent scope!");
+    assert(Parent && "Unable to find Parent scope!");
     NScope->setParent(Parent);
     Parent->addScope(NScope);
   } else if (DIDescriptor(N).isLexicalBlock()) {
     DILexicalBlock DB(N);
-    if (!DB.getContext().isNull()) {
-      Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
-      NScope->setParent(Parent);
-      Parent->addScope(NScope);
-    }
+    Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
+    NScope->setParent(Parent);
+    Parent->addScope(NScope);
   }
 
   NScope->setFirstInsn(MI);
 
   if (!Parent && !InlinedAt) {
     StringRef SPName = DISubprogram(N).getLinkageName();
-    if (SPName == MF->getFunction()->getName())
+    if (SPName == Asm->MF->getFunction()->getName())
       CurrentFnDbgScope = NScope;
   }
 
-  if (GetConcreteScope) {
+  if (isAConcreteScope) {
     ConcreteScopes[InlinedAt] = NScope;
     getOrCreateAbstractScope(N);
   }
@@ -1306,7 +1308,7 @@
 }
 
 DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
-  assert (N && "Invalid Scope encoding!");
+  assert(N && "Invalid Scope encoding!");
 
   DbgScope *AScope = AbstractScopes.lookup(N);
   if (AScope)
@@ -1318,8 +1320,7 @@
   if (Scope.isLexicalBlock()) {
     DILexicalBlock DB(N);
     DIDescriptor ParentDesc = DB.getContext();
-    if (!ParentDesc.isNull())
-      Parent = getOrCreateAbstractScope(ParentDesc.getNode());
+    Parent = getOrCreateAbstractScope(ParentDesc.getNode());
   }
 
   AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
@@ -1333,80 +1334,92 @@
   return AScope;
 }
 
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+static bool isSubprogramContext(MDNode *Context) {
+  if (!Context)
+    return false;
+  DIDescriptor D(Context);
+  if (D.isSubprogram())
+    return true;
+  if (D.isType())
+    return isSubprogramContext(DIType(Context).getContext().getNode());
+  return false;
+}
+
 /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
 /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
 /// If there are global variables in this scope then create and insert
 /// DIEs for these variables.
 DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
+  DIE *SPDie = ModuleCU->getDIE(SPNode);
+  assert(SPDie && "Unable to find subprogram DIE!");
+  DISubprogram SP(SPNode);
+  
+  // There is not any need to generate specification DIE for a function
+  // defined at compile unit level. If a function is defined inside another
+  // function then gdb prefers the definition at top level and but does not
+  // expect specification DIE in parent function. So avoid creating 
+  // specification DIE for a function defined inside a function.
+  if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+      !SP.getContext().isFile() && 
+      !isSubprogramContext(SP.getContext().getNode())) {
+    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+    
+    // Add arguments. 
+    DICompositeType SPTy = SP.getType();
+    DIArray Args = SPTy.getTypeArray();
+    unsigned SPTag = SPTy.getTag();
+    if (SPTag == dwarf::DW_TAG_subroutine_type)
+      for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+        addType(Arg, ATy);
+        if (ATy.isArtificial())
+          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+        SPDie->addChild(Arg);
+      }
+    DIE *SPDeclDie = SPDie;
+    SPDie = new DIE(dwarf::DW_TAG_subprogram);
+    addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, 
+                SPDeclDie);
+    ModuleCU->addDie(SPDie);
+  }
+  
+  addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+           Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
+  addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+           Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+  addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
 
- DIE *SPDie = ModuleCU->getDIE(SPNode);
- assert (SPDie && "Unable to find subprogram DIE!");
- DISubprogram SP(SPNode);
- // There is not any need to generate specification DIE for a function
- // defined at compile unit level. If a function is defined inside another
- // function then gdb prefers the definition at top level and but does not
- // expect specification DIE in parent function. So avoid creating 
- // specification DIE for a function defined inside a function.
- if (SP.isDefinition() && !SP.getContext().isCompileUnit()
-     && !SP.getContext().isSubprogram()) {
-   addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-  // Add arguments. 
-   DICompositeType SPTy = SP.getType();
-   DIArray Args = SPTy.getTypeArray();
-   unsigned SPTag = SPTy.getTag();
-   if (SPTag == dwarf::DW_TAG_subroutine_type)
-     for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
-       DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-       DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
-       addType(Arg, ATy);
-       if (ATy.isArtificial())
-         addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
-       SPDie->addChild(Arg);
-     }
-   DIE *SPDeclDie = SPDie;
-   SPDie = new DIE(dwarf::DW_TAG_subprogram);
-   addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, 
-               SPDeclDie);
-   ModuleCU->addDie(SPDie);
- }
-
- addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-          DWLabel("func_begin", SubprogramCount));
- addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-          DWLabel("func_end", SubprogramCount));
- MachineLocation Location(RI->getFrameRegister(*MF));
- addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
-
- if (!DISubprogram(SPNode).isLocalToUnit())
-   addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-
- return SPDie;
+  if (!DISubprogram(SPNode).isLocalToUnit())
+    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+  
+  return SPDie;
 }
 
 /// constructLexicalScope - Construct new DW_TAG_lexical_block
 /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
 DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
-  unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
-  unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+  
+  MCSymbol *Start = InsnBeforeLabelMap.lookup(Scope->getFirstInsn());
+  MCSymbol *End = InsnAfterLabelMap.lookup(Scope->getLastInsn());
+  if (Start == 0 || End == 0) return 0;
 
-  // Ignore empty scopes.
-  if (StartID == EndID && StartID != 0)
-    return NULL;
-
+  assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+  assert(End->isDefined() && "Invalid end label for an inlined scope!");
+  
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
   if (Scope->isAbstractScope())
     return ScopeDIE;
 
   addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-           StartID ?
-             DWLabel("label", StartID)
-           : DWLabel("func_begin", SubprogramCount));
+           Start ? Start : Asm->GetTempSymbol("func_begin",
+                                              Asm->getFunctionNumber()));
   addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           EndID ?
-             DWLabel("label", EndID)
-           : DWLabel("func_end", SubprogramCount));
-
-
+           End ? End : Asm->GetTempSymbol("func_end",Asm->getFunctionNumber()));
 
   return ScopeDIE;
 }
@@ -1415,29 +1428,27 @@
 /// a function. Construct DIE to represent this concrete inlined copy
 /// of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
-  unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
-  unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
-  assert (StartID && "Invalid starting label for an inlined scope!");
-  assert (EndID && "Invalid end label for an inlined scope!");
-  // Ignore empty scopes.
-  if (StartID == EndID && StartID != 0)
+  MCSymbol *StartLabel = InsnBeforeLabelMap.lookup(Scope->getFirstInsn());
+  MCSymbol *EndLabel = InsnAfterLabelMap.lookup(Scope->getLastInsn());
+  if (StartLabel == 0 || EndLabel == 0) return 0;
+  
+  assert(StartLabel->isDefined() &&
+         "Invalid starting label for an inlined scope!");
+  assert(EndLabel->isDefined() &&
+         "Invalid end label for an inlined scope!");
+  if (!Scope->getScopeNode())
     return NULL;
-
   DIScope DS(Scope->getScopeNode());
-  if (DS.isNull())
-    return NULL;
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
 
   DISubprogram InlinedSP = getDISubprogram(DS.getNode());
   DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode());
-  assert (OriginDIE && "Unable to find Origin DIE!");
+  assert(OriginDIE && "Unable to find Origin DIE!");
   addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
               dwarf::DW_FORM_ref4, OriginDIE);
 
-  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-           DWLabel("label", StartID));
-  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           DWLabel("label", EndID));
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
 
   InlinedSubprogramDIEs.insert(OriginDIE);
 
@@ -1446,14 +1457,11 @@
     I = InlineInfo.find(InlinedSP.getNode());
 
   if (I == InlineInfo.end()) {
-    InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID,
+    InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartLabel,
                                                              ScopeDIE));
     InlinedSPNodes.push_back(InlinedSP.getNode());
   } else
-    I->second.push_back(std::make_pair(StartID, ScopeDIE));
-
-  StringPool.insert(InlinedSP.getName());
-  StringPool.insert(getRealLinkageName(InlinedSP.getLinkageName()));
+    I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
 
   DILocation DL(Scope->getInlinedAt());
   addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
@@ -1499,9 +1507,9 @@
     DISubprogram InlinedSP = getDISubprogram(DS.getNode());
     DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode());
     (void) OriginSPDIE;
-    assert (OriginSPDIE && "Unable to find Origin DIE for the SP!");
+    assert(OriginSPDIE && "Unable to find Origin DIE for the SP!");
     DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
-    assert (AbsDIE && "Unable to find Origin DIE for the Variable!");
+    assert(AbsDIE && "Unable to find Origin DIE for the Variable!");
     addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
                 dwarf::DW_FORM_ref4, AbsDIE);
   }
@@ -1520,17 +1528,74 @@
 
   // Add variable address.
   if (!Scope->isAbstractScope()) {
-    MachineLocation Location;
-    unsigned FrameReg;
-    int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
-    Location.set(FrameReg, Offset);
+    // Check if variable is described by DBG_VALUE instruction.
+    if (const MachineInstr *DbgValueInsn = DV->getDbgValue()) {
+      if (DbgValueInsn->getNumOperands() == 3) {
+        // FIXME : Handle getNumOperands != 3 
+        if (DbgValueInsn->getOperand(0).getType() 
+            == MachineOperand::MO_Register
+            && DbgValueInsn->getOperand(0).getReg()) {
+          MachineLocation Location;
+          Location.set(DbgValueInsn->getOperand(0).getReg());
+          addAddress(VariableDie, dwarf::DW_AT_location, Location);
+          if (MCSymbol *VS = DV->getDbgValueLabel())
+            addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+                     VS);
+        } else if (DbgValueInsn->getOperand(0).getType() == 
+                   MachineOperand::MO_Immediate) {
+          DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+          unsigned Imm = DbgValueInsn->getOperand(0).getImm();
+          addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
+          addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block);
+          if (MCSymbol *VS = DV->getDbgValueLabel())
+            addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+                     VS);
+        } else if (DbgValueInsn->getOperand(0).getType() == 
+                   MachineOperand::MO_FPImmediate) {
+          DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+          APFloat FPImm = DbgValueInsn->getOperand(0).getFPImm()->getValueAPF();
 
-    if (VD.hasComplexAddress())
-      addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-    else if (VD.isBlockByrefVariable())
-      addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-    else
-      addAddress(VariableDie, dwarf::DW_AT_location, Location);
+          // Get the raw data form of the floating point.
+          const APInt FltVal = FPImm.bitcastToAPInt();
+          const char *FltPtr = (const char*)FltVal.getRawData();
+
+          int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+          bool LittleEndian = Asm->getTargetData().isLittleEndian();
+          int Incr = (LittleEndian ? 1 : -1);
+          int Start = (LittleEndian ? 0 : NumBytes - 1);
+          int Stop = (LittleEndian ? NumBytes : -1);
+
+          // Output the constant to DWARF one byte at a time.
+          for (; Start != Stop; Start += Incr)
+            addUInt(Block, 0, dwarf::DW_FORM_data1,
+                    (unsigned char)0xFF & FltPtr[Start]);
+
+          addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block);
+
+          if (MCSymbol *VS = DV->getDbgValueLabel())
+            addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+                     VS);
+        } else {
+          //FIXME : Handle other operand types.
+          delete VariableDie;
+          return NULL;
+        }
+      } 
+    } else {
+      MachineLocation Location;
+      unsigned FrameReg;
+      const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+      int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(),
+                                              FrameReg);
+      Location.set(FrameReg, Offset);
+      
+      if (VD.hasComplexAddress())
+        addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+      else if (VD.isBlockByrefVariable())
+        addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+      else
+        addAddress(VariableDie, dwarf::DW_AT_location, Location);
+    }
   }
 
   if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
@@ -1547,15 +1612,13 @@
     return;
 
   DIArray Args = SPTy.getTypeArray();
-  if (Args.isNull()) 
-    return;
-
   for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
     DIType ATy(Args.getElement(i).getNode());
-    if (ATy.isNull())
+    if (!ATy.isValid())
       continue;
     DICompositeType CATy = getDICompositeType(ATy);
-    if (!CATy.isNull() && !CATy.getName().empty()) {
+    if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()
+        && !CATy.isForwardDecl()) {
       if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode()))
         ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry());
     }
@@ -1564,28 +1627,25 @@
 
 /// constructScopeDIE - Construct a DIE for this scope.
 DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
- if (!Scope)
-  return NULL;
- DIScope DS(Scope->getScopeNode());
- if (DS.isNull())
-   return NULL;
-
- DIE *ScopeDIE = NULL;
- if (Scope->getInlinedAt())
-   ScopeDIE = constructInlinedScopeDIE(Scope);
- else if (DS.isSubprogram()) {
-   if (Scope->isAbstractScope())
-     ScopeDIE = ModuleCU->getDIE(DS.getNode());
-   else
-     ScopeDIE = updateSubprogramScopeDIE(DS.getNode());
- }
- else {
-   ScopeDIE = constructLexicalScopeDIE(Scope);
-   if (!ScopeDIE) return NULL;
- }
-
+  if (!Scope || !Scope->getScopeNode())
+    return NULL;
+ 
+  DIScope DS(Scope->getScopeNode());
+  DIE *ScopeDIE = NULL;
+  if (Scope->getInlinedAt())
+    ScopeDIE = constructInlinedScopeDIE(Scope);
+  else if (DS.isSubprogram()) {
+    if (Scope->isAbstractScope())
+      ScopeDIE = ModuleCU->getDIE(DS.getNode());
+    else
+      ScopeDIE = updateSubprogramScopeDIE(DS.getNode());
+  }
+  else
+    ScopeDIE = constructLexicalScopeDIE(Scope);
+  if (!ScopeDIE) return NULL;
+  
   // Add variables to scope.
-  SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+  const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
   for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
     DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
     if (VariableDIE)
@@ -1593,7 +1653,7 @@
   }
 
   // Add nested scopes.
-  SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+  const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
   for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
     // Define the Scope debug information entry.
     DIE *NestedDIE = constructScopeDIE(Scopes[j]);
@@ -1611,7 +1671,7 @@
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) {
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
   unsigned DId;
   StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
   if (DI != DirectoryIdMap.end()) {
@@ -1658,21 +1718,29 @@
   return NDie;
 }
 
-CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) {
+void DwarfDebug::constructCompileUnit(MDNode *N) {
   DICompileUnit DIUnit(N);
+  // Use first compile unit marked as isMain as the compile unit for this
+  // module.
+  if (ModuleCU || !DIUnit.isMain())
+    return;
   StringRef FN = DIUnit.getFilename();
   StringRef Dir = DIUnit.getDirectory();
   unsigned ID = GetOrCreateSourceID(Dir, FN);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  addSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                   DWLabel("section_line", 0), DWLabel("section_line", 0),
-                   false);
   addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
             DIUnit.getProducer());
   addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
           DIUnit.getLanguage());
   addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+  // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
+  // simplifies debug range entries.
+  addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0);
+  // DW_AT_stmt_list is a offset of line number information for this
+  // compile unit in debug_line section. It is always zero when only one
+  // compile unit is emitted in one object file.
+  addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
 
   if (!Dir.empty())
     addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
@@ -1688,16 +1756,9 @@
     addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
-  CompileUnit *Unit = new CompileUnit(ID, Die);
-  if (!ModuleCU && DIUnit.isMain()) {
-    // Use first compile unit marked as isMain as the compile unit
-    // for this module.
-    ModuleCU = Unit;
-  }
-
-  CompileUnitMap[DIUnit.getNode()] = Unit;
-  CompileUnits.push_back(Unit);
-  return Unit;
+  assert(!ModuleCU &&
+         "ModuleCU assigned since the top of constructCompileUnit");
+  ModuleCU = new CompileUnit(ID, Die);
 }
 
 void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
@@ -1722,24 +1783,25 @@
   DIDescriptor GVContext = DI_GV.getContext();
   // Do not create specification DIE if context is either compile unit
   // or a subprogram.
-  if (DI_GV.isDefinition() && !GVContext.isCompileUnit()
-      && !GVContext.isSubprogram()) {
+  if (DI_GV.isDefinition() && !GVContext.isCompileUnit() &&
+      !GVContext.isFile() && 
+      !isSubprogramContext(GVContext.getNode())) {
     // Create specification DIE.
     DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
     addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
                 dwarf::DW_FORM_ref4, VariableDie);
-    DIEBlock *Block = new DIEBlock();
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addObjectLabel(Block, 0, dwarf::DW_FORM_udata,
-                   Asm->GetGlobalValueSymbol(DI_GV.getGlobal()));
+    addLabel(Block, 0, dwarf::DW_FORM_udata,
+             Asm->Mang->getSymbol(DI_GV.getGlobal()));
     addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
     addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
     ModuleCU->addDie(VariableSpecDIE);
   } else {
-    DIEBlock *Block = new DIEBlock();
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addObjectLabel(Block, 0, dwarf::DW_FORM_udata,
-                   Asm->GetGlobalValueSymbol(DI_GV.getGlobal()));
+    addLabel(Block, 0, dwarf::DW_FORM_udata,
+             Asm->Mang->getSymbol(DI_GV.getGlobal()));
     addBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
   }
   addToContextOwner(VariableDie, GVContext);
@@ -1748,9 +1810,10 @@
   ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
 
   DIType GTy = DI_GV.getType();
-  if (GTy.isCompositeType() && !GTy.getName().empty()) {
+  if (GTy.isCompositeType() && !GTy.getName().empty()
+      && !GTy.isForwardDecl()) {
     DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
-    assert (Entry && "Missing global type!");
+    assert(Entry && "Missing global type!");
     ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
   }
   return;
@@ -1785,35 +1848,35 @@
 /// beginModule - Emit all Dwarf sections that should come prior to the
 /// content. Create global DIEs and emit initial debug info sections.
 /// This is inovked by the target AsmPrinter.
-void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
-  this->M = M;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  if (!MAI->doesSupportDebugInformation())
-    return;
-
+void DwarfDebug::beginModule(Module *M) {
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(*M);
 
+  bool HasDebugInfo = false;
+  
+  // Scan all the compile-units to see if there are any marked as the main unit.
+  // if not, we do not generate debug info.
+  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+       E = DbgFinder.compile_unit_end(); I != E; ++I) {
+    if (DICompileUnit(*I).isMain()) {
+      HasDebugInfo = true;
+      break;
+    }
+  }
+  
+  if (!HasDebugInfo) return;
+
+  // Tell MMI that we have debug info.
+  MMI->setDebugInfoAvailability(true);
+  
+  // Emit initial sections.
+  EmitSectionLabels();
+  
   // Create all the compile unit DIEs.
   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
          E = DbgFinder.compile_unit_end(); I != E; ++I)
     constructCompileUnit(*I);
 
-  if (CompileUnits.empty()) {
-    if (TimePassesIsEnabled)
-      DebugTimer->stopTimer();
-
-    return;
-  }
-
-  // If main compile unit for this module is not seen than randomly
-  // select first compile unit.
-  if (!ModuleCU)
-    ModuleCU = CompileUnits[0];
-
   // Create DIEs for each subprogram.
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
          E = DbgFinder.subprogram_end(); I != E; ++I)
@@ -1824,16 +1887,12 @@
          E = DbgFinder.global_variable_end(); I != E; ++I)
     constructGlobalVariableDIE(*I);
 
-  MMI = mmi;
-  shouldEmit = true;
-  MMI->setDebugInfoAvailability(true);
-
   // Prime section data.
   SectionMap.insert(Asm->getObjFileLowering().getTextSection());
 
   // Print out .file directives to specify files for .loc directives. These are
   // printed out early so that they precede any .loc directives.
-  if (MAI->hasDotLocAndDotFile()) {
+  if (Asm->MAI->hasDotLocAndDotFile()) {
     for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
       // Remember source id starts at 1.
       std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
@@ -1847,22 +1906,12 @@
       Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str());
     }
   }
-
-  // Emit initial sections
-  emitInitial();
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
 }
 
 /// endModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfDebug::endModule() {
-  if (!ModuleCU)
-    return;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
+  if (!ModuleCU) return;
 
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -1871,11 +1920,6 @@
     addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
-  // Insert top level DIEs.
-  for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(),
-         TE = TopLevelDIEsVector.end(); TI != TE; ++TI)
-    ModuleCU->getCUDie()->addChild(*TI);
-
   for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(),
          CE = ContainingTypeMap.end(); CI != CE; ++CI) {
     DIE *SPDie = CI->first;
@@ -1884,20 +1928,18 @@
     DIE *NDie = ModuleCU->getDIE(N);
     if (!NDie) continue;
     addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
-    // FIXME - This is not the correct approach.
-    // addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
   }
 
   // Standard sections final addresses.
   Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
-  EmitLabel("text_end", 0);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
   Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
-  EmitLabel("data_end", 0);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
 
   // End text sections.
   for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
     Asm->OutStreamer.SwitchSection(SectionMap[i]);
-    EmitLabel("section_end", i);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
   }
 
   // Emit common frame information.
@@ -1926,9 +1968,6 @@
   // Emit info into a debug pubtypes section.
   emitDebugPubTypes();
 
-  // Emit info into a debug str section.
-  emitDebugStr();
-
   // Emit info into a debug loc section.
   emitDebugLoc();
 
@@ -1944,89 +1983,187 @@
   // Emit inline info.
   emitDebugInlineInfo();
 
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
+  // Emit info into a debug str section.
+  emitDebugStr();
+  
+  delete ModuleCU;
+  ModuleCU = NULL;  // Reset for the next Module, if any.
 }
 
 /// findAbstractVariable - Find abstract variable, if any, associated with Var.
 DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
                                               unsigned FrameIdx,
-                                              DILocation &ScopeLoc) {
+                                              DebugLoc ScopeLoc) {
 
   DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
   if (AbsDbgVariable)
     return AbsDbgVariable;
 
-  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode());
+  LLVMContext &Ctx = Var.getNode()->getContext();
+  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
   if (!Scope)
     return NULL;
 
-  AbsDbgVariable = new DbgVariable(Var, FrameIdx);
+  AbsDbgVariable = new DbgVariable(Var, FrameIdx,
+                                   NULL /* No more-abstract variable*/);
   Scope->addVariable(AbsDbgVariable);
   AbstractVariables[Var.getNode()] = AbsDbgVariable;
   return AbsDbgVariable;
 }
 
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+/// FIXME : Refactor findAbstractVariable.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+                                              const MachineInstr *MI,
+                                              DebugLoc ScopeLoc) {
+
+  DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
+  if (AbsDbgVariable)
+    return AbsDbgVariable;
+
+  LLVMContext &Ctx = Var.getNode()->getContext();
+  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
+  if (!Scope)
+    return NULL;
+
+  AbsDbgVariable = new DbgVariable(Var, MI,
+                                   NULL /* No more-abstract variable*/);
+  Scope->addVariable(AbsDbgVariable);
+  AbstractVariables[Var.getNode()] = AbsDbgVariable;
+  DbgValueStartMap[MI] = AbsDbgVariable;
+  return AbsDbgVariable;
+}
+
 /// collectVariableInfo - Populate DbgScope entries with variables' info.
 void DwarfDebug::collectVariableInfo() {
-  if (!MMI) return;
+  const LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
 
   MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
   for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
          VE = VMap.end(); VI != VE; ++VI) {
     MDNode *Var = VI->first;
     if (!Var) continue;
-    DIVariable DV (Var);
-    std::pair< unsigned, MDNode *> VP = VI->second;
-    DILocation ScopeLoc(VP.second);
+    DIVariable DV(Var);
+    const std::pair<unsigned, DebugLoc> &VP = VI->second;
 
-    DbgScope *Scope =
-      ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
-    if (!Scope)
-      Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
+    DbgScope *Scope = 0;
+    if (MDNode *IA = VP.second.getInlinedAt(Ctx))
+      Scope = ConcreteScopes.lookup(IA);
+    if (Scope == 0)
+      Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
+    
     // If variable scope is not found then skip this variable.
-    if (!Scope)
+    if (Scope == 0)
       continue;
 
-    DbgVariable *RegVar = new DbgVariable(DV, VP.first);
+    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, VP.second);
+    DbgVariable *RegVar = new DbgVariable(DV, VP.first, AbsDbgVariable);
     Scope->addVariable(RegVar);
-    if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first,
-                                                           ScopeLoc))
-      RegVar->setAbstractVariable(AbsDbgVariable);
+  }
+
+  // Collect variable information from DBG_VALUE machine instructions;
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      if (!MInsn->isDebugValue())
+        continue;
+
+      // FIXME : Lift this restriction.
+      if (MInsn->getNumOperands() != 3)
+        continue;
+      DIVariable DV(
+        const_cast<MDNode *>(MInsn->getOperand(MInsn->getNumOperands() - 1)
+                               .getMetadata()));
+      if (DV.getTag() == dwarf::DW_TAG_arg_variable)  {
+        // FIXME Handle inlined subroutine arguments.
+        DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL);
+        CurrentFnDbgScope->addVariable(ArgVar);
+        DbgValueStartMap[MInsn] = ArgVar;
+        continue;
+      }
+
+      DebugLoc DL = MInsn->getDebugLoc();
+      if (DL.isUnknown()) continue;
+      DbgScope *Scope = 0;
+      if (MDNode *IA = DL.getInlinedAt(Ctx))
+        Scope = ConcreteScopes.lookup(IA);
+      if (Scope == 0)
+        Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
+      
+      // If variable scope is not found then skip this variable.
+      if (Scope == 0)
+        continue;
+
+      DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, DL);
+      DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable);
+      DbgValueStartMap[MInsn] = RegVar;
+      Scope->addVariable(RegVar);
+    }
   }
 }
 
-/// beginScope - Process beginning of a scope starting at Label.
-void DwarfDebug::beginScope(const MachineInstr *MI, unsigned Label) {
-  InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
-  if (I == DbgScopeBeginMap.end())
+/// beginScope - Process beginning of a scope.
+void DwarfDebug::beginScope(const MachineInstr *MI) {
+  // Check location.
+  DebugLoc DL = MI->getDebugLoc();
+  if (DL.isUnknown())
     return;
-  ScopeVector &SD = I->second;
-  for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI)
-    (*SDI)->setStartLabelID(Label);
+
+  // Check and update last known location info.
+  if (DL == PrevInstLoc)
+    return;
+  
+  MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+  
+  // FIXME: Should only verify each scope once!
+  if (!DIScope(Scope).Verify())
+    return;
+
+  // DBG_VALUE instruction establishes new value.
+  if (MI->isDebugValue()) {
+    DenseMap<const MachineInstr *, DbgVariable *>::iterator DI
+      = DbgValueStartMap.find(MI);
+    if (DI != DbgValueStartMap.end()) {
+      MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+      PrevInstLoc = DL;
+      DI->second->setDbgValueLabel(Label);
+    }
+    return;
+  }
+
+  // Emit a label to indicate location change. This is used for line 
+  // table even if this instruction does start a new scope.
+  MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+  PrevInstLoc = DL;
+
+  // If this instruction begins a scope then note down corresponding label.
+  if (InsnsBeginScopeSet.count(MI) != 0)
+    InsnBeforeLabelMap[MI] = Label;
 }
 
 /// endScope - Process end of a scope.
 void DwarfDebug::endScope(const MachineInstr *MI) {
-  InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
-  if (I == DbgScopeEndMap.end())
+  // Ignore DBG_VALUE instruction.
+  if (MI->isDebugValue())
     return;
 
-  unsigned Label = MMI->NextLabelID();
-  Asm->printLabel(Label);
-  O << '\n';
+  // Check location.
+  DebugLoc DL = MI->getDebugLoc();
+  if (DL.isUnknown())
+    return;
 
-  SmallVector<DbgScope *, 2> &SD = I->second;
-  for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI)
-    (*SDI)->setEndLabelID(Label);
-  return;
+  if (InsnsEndScopeSet.count(MI) != 0) {
+    // Emit a label if this instruction ends a scope.
+    MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(Label);
+    InsnAfterLabelMap[MI] = Label;
+  }
 }
 
 /// createDbgScope - Create DbgScope for the scope.
 void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
-
   if (!InlinedAt) {
     DbgScope *WScope = DbgScopeMap.lookup(Scope);
     if (WScope)
@@ -2049,7 +2186,7 @@
 }
 
 /// extractScopeInformation - Scan machine instructions in this function
-/// and collect DbgScopes. Return true, if atleast one scope was found.
+/// and collect DbgScopes. Return true, if at least one scope was found.
 bool DwarfDebug::extractScopeInformation() {
   // If scope information was extracted using .dbg intrinsics then there is not
   // any need to extract these information by scanning each instruction.
@@ -2058,45 +2195,52 @@
 
   DenseMap<const MachineInstr *, unsigned> MIIndexMap;
   unsigned MIIndex = 0;
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  
   // Scan each instruction and create scopes. First build working set of scopes.
-  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
+      // FIXME : Remove DBG_VALUE check.
+      if (MInsn->isDebugValue()) continue;
       MIIndexMap[MInsn] = MIIndex++;
+      
       DebugLoc DL = MInsn->getDebugLoc();
       if (DL.isUnknown()) continue;
-      DILocation DLT = MF->getDILocation(DL);
-      DIScope DLTScope = DLT.getScope();
-      if (DLTScope.isNull()) continue;
+      
+      MDNode *Scope = DL.getScope(Ctx);
+      
       // There is no need to create another DIE for compile unit. For all
       // other scopes, create one DbgScope now. This will be translated
       // into a scope DIE at the end.
-      if (DLTScope.isCompileUnit()) continue;
-      createDbgScope(DLTScope.getNode(), DLT.getOrigLocation().getNode());
+      if (DIScope(Scope).isCompileUnit()) continue;
+      createDbgScope(Scope, DL.getInlinedAt(Ctx));
     }
   }
 
 
   // Build scope hierarchy using working set of scopes.
-  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
+      // FIXME : Remove DBG_VALUE check.
+      if (MInsn->isDebugValue()) continue;
       DebugLoc DL = MInsn->getDebugLoc();
-      if (DL.isUnknown())  continue;
-      DILocation DLT = MF->getDILocation(DL);
-      DIScope DLTScope = DLT.getScope();
-      if (DLTScope.isNull())  continue;
+      if (DL.isUnknown()) continue;
+
+      MDNode *Scope = DL.getScope(Ctx);
+      if (Scope == 0) continue;
+      
       // There is no need to create another DIE for compile unit. For all
       // other scopes, create one DbgScope now. This will be translated
       // into a scope DIE at the end.
-      if (DLTScope.isCompileUnit()) continue;
-      DbgScope *Scope = getUpdatedDbgScope(DLTScope.getNode(), MInsn, 
-                                           DLT.getOrigLocation().getNode());
-      Scope->setLastInsn(MInsn);
+      if (DIScope(Scope).isCompileUnit()) continue;
+      DbgScope *DScope = getUpdatedDbgScope(Scope, MInsn, DL.getInlinedAt(Ctx));
+      DScope->setLastInsn(MInsn);
     }
   }
 
@@ -2105,6 +2249,15 @@
 
   CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap);
 
+  identifyScopeMarkers();
+
+  return !DbgScopeMap.empty();
+}
+
+/// identifyScopeMarkers() - Indentify instructions that are marking
+/// beginning of or end of a scope.
+void DwarfDebug::identifyScopeMarkers() {
+
   // Each scope has first instruction and last instruction to mark beginning
   // and end of a scope respectively. Create an inverse map that list scopes
   // starts (and ends) with an instruction. One instruction may start (or end)
@@ -2112,90 +2265,80 @@
   SmallVector<DbgScope *, 4> WorkList;
   WorkList.push_back(CurrentFnDbgScope);
   while (!WorkList.empty()) {
-    DbgScope *S = WorkList.back(); WorkList.pop_back();
+    DbgScope *S = WorkList.pop_back_val();
 
-    SmallVector<DbgScope *, 4> &Children = S->getScopes();
+    const SmallVector<DbgScope *, 4> &Children = S->getScopes();
     if (!Children.empty()) 
-      for (SmallVector<DbgScope *, 4>::iterator SI = Children.begin(),
+      for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
              SE = Children.end(); SI != SE; ++SI)
         WorkList.push_back(*SI);
 
     if (S->isAbstractScope())
       continue;
-    const MachineInstr *MI = S->getFirstInsn();
-    assert (MI && "DbgScope does not have first instruction!");
+    assert(S->getFirstInsn() && "DbgScope does not have first instruction!");
+    InsnsBeginScopeSet.insert(S->getFirstInsn());
 
-    InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI);
-    if (IDI != DbgScopeBeginMap.end())
-      IDI->second.push_back(S);
-    else
-      DbgScopeBeginMap[MI].push_back(S);
-
-    MI = S->getLastInsn();
-    assert (MI && "DbgScope does not have last instruction!");
-    IDI = DbgScopeEndMap.find(MI);
-    if (IDI != DbgScopeEndMap.end())
-      IDI->second.push_back(S);
-    else
-      DbgScopeEndMap[MI].push_back(S);
+    assert(S->getLastInsn() && "DbgScope does not have last instruction!");
+    InsnsEndScopeSet.insert(S->getLastInsn());
   }
+}
 
-  return !DbgScopeMap.empty();
+/// FindFirstDebugLoc - Find the first debug location in the function. This
+/// is intended to be an approximation for the source position of the
+/// beginning of the function.
+static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end();
+         MBBI != MBBE; ++MBBI) {
+      DebugLoc DL = MBBI->getDebugLoc();
+      if (!DL.isUnknown())
+        return DL;
+    }
+  return DebugLoc();
 }
 
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
-  this->MF = MF;
-
-  if (!ShouldEmitDwarfDebug()) return;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  if (!extractScopeInformation())
-    return;
-
+  if (!MMI->hasDebugInfo()) return;
+  if (!extractScopeInformation()) return;
+  
   collectVariableInfo();
 
   // Assumes in correct section after the entry point.
-  EmitLabel("func_begin", ++SubprogramCount);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_begin",
+                                                Asm->getFunctionNumber()));
 
   // Emit label for the implicitly defined dbg.stoppoint at the start of the
   // function.
-  DebugLoc FDL = MF->getDefaultDebugLoc();
-  if (!FDL.isUnknown()) {
-    DILocation DLT = MF->getDILocation(FDL);
-    unsigned LabelID = 0;
-    DISubprogram SP = getDISubprogram(DLT.getScope().getNode());
-    if (!SP.isNull())
-      LabelID = recordSourceLine(SP.getLineNumber(), 0, 
-                                 DLT.getScope().getNode());
-    else
-      LabelID = recordSourceLine(DLT.getLineNumber(), 
-                                 DLT.getColumnNumber(), 
-                                 DLT.getScope().getNode());
-    Asm->printLabel(LabelID);
-    O << '\n';
+  DebugLoc FDL = FindFirstDebugLoc(MF);
+  if (FDL.isUnknown()) return;
+  
+  MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
+  
+  DISubprogram SP = getDISubprogram(Scope);
+  unsigned Line, Col;
+  if (SP.Verify()) {
+    Line = SP.getLineNumber();
+    Col = 0;
+  } else {
+    Line = FDL.getLine();
+    Col = FDL.getCol();
   }
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
+  
+  recordSourceLine(Line, Col, Scope);
 }
 
 /// endFunction - Gather and emit post-function debug information.
 ///
 void DwarfDebug::endFunction(const MachineFunction *MF) {
-  if (!ShouldEmitDwarfDebug()) return;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  if (DbgScopeMap.empty())
-    return;
+  if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
 
   if (CurrentFnDbgScope) {
     // Define end label for subprogram.
-    EmitLabel("func_end", SubprogramCount);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_end",
+                                                  Asm->getFunctionNumber()));
     
     // Get function line info.
     if (!Lines.empty()) {
@@ -2215,34 +2358,30 @@
     
     constructScopeDIE(CurrentFnDbgScope);
     
-    DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
+    DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
                                                  MMI->getFrameMoves()));
   }
 
   // Clear debug info
   CurrentFnDbgScope = NULL;
-  DbgScopeMap.clear();
-  DbgScopeBeginMap.clear();
-  DbgScopeEndMap.clear();
+  DeleteContainerSeconds(DbgScopeMap);
+  InsnsBeginScopeSet.clear();
+  InsnsEndScopeSet.clear();
+  DbgValueStartMap.clear();
   ConcreteScopes.clear();
+  DeleteContainerSeconds(AbstractScopes);
   AbstractScopesList.clear();
+  AbstractVariables.clear();
+  InsnBeforeLabelMap.clear();
+  InsnAfterLabelMap.clear();
   Lines.clear();
-  
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
+  PrevLabel = NULL;
 }
 
-/// recordSourceLine - Records location information and associates it with a
-/// label. Returns a unique label ID used to generate a label and provide
-/// correspondence to the source line list.
-unsigned DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
-                                      MDNode *S) {
-  if (!MMI)
-    return 0;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
+/// recordSourceLine - Register a source line with debug info. Returns the
+/// unique label that was emitted and which provides correspondence to
+/// the source line list.
+MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
   StringRef Dir;
   StringRef Fn;
 
@@ -2260,34 +2399,14 @@
     Dir = DB.getDirectory();
     Fn = DB.getFilename();
   } else
-    assert (0 && "Unexpected scope info");
+    assert(0 && "Unexpected scope info");
 
   unsigned Src = GetOrCreateSourceID(Dir, Fn);
-  unsigned ID = MMI->NextLabelID();
-  Lines.push_back(SrcLineInfo(Line, Col, Src, ID));
+  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+  Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
 
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-
-  return ID;
-}
-
-/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
-/// timed. Look up the source id with the given directory and source file
-/// names. If none currently exists, create a new id and insert it in the
-/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
-/// well.
-unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
-                                         const std::string &FileName) {
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  unsigned SrcId = GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-
-  return SrcId;
+  Asm->OutStreamer.EmitLabel(Label);
+  return Label;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2302,7 +2421,8 @@
   const std::vector<DIE *> &Children = Die->getChildren();
 
   // If not last sibling and has children then add sibling offset attribute.
-  if (!Last && !Children.empty()) Die->addSiblingOffset();
+  if (!Last && !Children.empty())
+    Die->addSiblingOffset(DIEValueAllocator);
 
   // Record the abbreviation.
   assignAbbrevNumber(Die->getAbbrev());
@@ -2323,7 +2443,7 @@
   // Size the DIE attribute values.
   for (unsigned i = 0, N = Values.size(); i < N; ++i)
     // Size attribute value.
-    Offset += Values[i]->SizeOf(TD, AbbrevData[i].getForm());
+    Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
 
   // Size the DIE children if any.
   if (!Children.empty()) {
@@ -2352,53 +2472,51 @@
     sizeof(int8_t);   // Pointer Size (in bytes)
 
   computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true);
-  CompileUnitOffsets[ModuleCU] = 0;
 }
 
-/// emitInitial - Emit initial Dwarf declarations.  This is necessary for cc
-/// tools to recognize the object file contains Dwarf information.
-void DwarfDebug::emitInitial() {
-  // Check to see if we already emitted intial headers.
-  if (didInitial) return;
-  didInitial = true;
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+                                const char *SymbolStem = 0) {
+  Asm->OutStreamer.SwitchSection(Section);
+  if (!SymbolStem) return 0;
+  
+  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+  Asm->OutStreamer.EmitLabel(TmpSym);
+  return TmpSym;
+}
 
+/// EmitSectionLabels - Emit initial Dwarf sections with a label at
+/// the start of each one.
+void DwarfDebug::EmitSectionLabels() {
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   // Dwarf sections base addresses.
-  if (MAI->doesDwarfRequireFrameSection()) {
-    Asm->OutStreamer.SwitchSection(TLOF.getDwarfFrameSection());
-    EmitLabel("section_debug_frame", 0);
-  }
+  if (Asm->MAI->doesDwarfRequireFrameSection()) {
+    DwarfFrameSectionSym =
+      EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame");
+   }
 
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfInfoSection());
-  EmitLabel("section_info", 0);
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfAbbrevSection());
-  EmitLabel("section_abbrev", 0);
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfARangesSection());
-  EmitLabel("section_aranges", 0);
+  DwarfInfoSectionSym = 
+    EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+  DwarfAbbrevSectionSym = 
+    EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+  EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
+  
+  if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
+    EmitSectionSym(Asm, MacroInfo);
 
-  if (const MCSection *LineInfoDirective = TLOF.getDwarfMacroInfoSection()) {
-    Asm->OutStreamer.SwitchSection(LineInfoDirective);
-    EmitLabel("section_macinfo", 0);
-  }
+  EmitSectionSym(Asm, TLOF.getDwarfLineSection());
+  EmitSectionSym(Asm, TLOF.getDwarfLocSection());
+  EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
+  EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+  DwarfStrSectionSym = 
+    EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
+  DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+                                             "debug_range");
 
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfLineSection());
-  EmitLabel("section_line", 0);
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfLocSection());
-  EmitLabel("section_loc", 0);
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection());
-  EmitLabel("section_pubnames", 0);
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubTypesSection());
-  EmitLabel("section_pubtypes", 0);
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection());
-  EmitLabel("section_str", 0);
-  Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection());
-  EmitLabel("section_ranges", 0);
-
-  Asm->OutStreamer.SwitchSection(TLOF.getTextSection());
-  EmitLabel("text_begin", 0);
-  Asm->OutStreamer.SwitchSection(TLOF.getDataSection());
-  EmitLabel("data_begin", 0);
+  TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+  EmitSectionSym(Asm, TLOF.getDataSection());
 }
 
 /// emitDIE - Recusively Emits a debug information entry.
@@ -2408,17 +2526,15 @@
   unsigned AbbrevNumber = Die->getAbbrevNumber();
   const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
 
-  Asm->O << '\n';
-
   // Emit the code (index) for the abbreviation.
-  if (Asm->VerboseAsm)
+  if (Asm->isVerbose())
     Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
                                 Twine::utohexstr(Die->getOffset()) + ":0x" +
                                 Twine::utohexstr(Die->getSize()) + " " +
                                 dwarf::TagString(Abbrev->getTag()));
-  EmitULEB128(AbbrevNumber);
+  Asm->EmitULEB128(AbbrevNumber);
 
-  SmallVector<DIEValue*, 32> &Values = Die->getValues();
+  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
   const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
 
   // Emit the DIE attribute values.
@@ -2427,7 +2543,7 @@
     unsigned Form = AbbrevData[i].getForm();
     assert(Form && "Too many attributes for DIE (check abbreviation)");
 
-    if (Asm->VerboseAsm)
+    if (Asm->isVerbose())
       Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
     
     switch (Attr) {
@@ -2441,10 +2557,18 @@
       Asm->EmitInt32(Addr);
       break;
     }
+    case dwarf::DW_AT_ranges: {
+      // DW_AT_range Value encodes offset in debug_range section.
+      DIEInteger *V = cast<DIEInteger>(Values[i]);
+      Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+                                     V->getValue(),
+                                     DwarfDebugRangeSectionSym,
+                                     4);
+      break;
+    }
     default:
       // Emit an attribute using the defined form.
-      Values[i]->EmitValue(this, Form);
-      O << "\n"; // REMOVE This once all EmitValue impls emit their own newline.
+      Values[i]->EmitValue(Asm, Form);
       break;
     }
   }
@@ -2456,7 +2580,9 @@
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
       emitDIE(Children[j]);
 
-    Asm->EmitInt8(0); EOL("End Of Children Mark");
+    if (Asm->isVerbose())
+      Asm->OutStreamer.AddComment("End Of Children Mark");
+    Asm->EmitInt8(0);
   }
 }
 
@@ -2469,7 +2595,8 @@
   DIE *Die = ModuleCU->getCUDie();
 
   // Emit the compile units header.
-  EmitLabel("info_begin", ModuleCU->getID());
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
+                                                ModuleCU->getID()));
 
   // Emit size of content not including length itself
   unsigned ContentSize = Die->getSize() +
@@ -2478,20 +2605,24 @@
     sizeof(int8_t) +  // Pointer Size (in bytes)
     sizeof(int32_t);  // FIXME - extra pad for gdb bug.
 
-  Asm->EmitInt32(ContentSize);  EOL("Length of Compilation Unit Info");
-  Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("DWARF version number");
-  EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false);
-  EOL("Offset Into Abbrev. Section");
-  Asm->EmitInt8(TD->getPointerSize()); EOL("Address Size (in bytes)");
+  Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+  Asm->EmitInt32(ContentSize);
+  Asm->OutStreamer.AddComment("DWARF version number");
+  Asm->EmitInt16(dwarf::DWARF_VERSION);
+  Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+  Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
+                         DwarfAbbrevSectionSym);
+  Asm->OutStreamer.AddComment("Address Size (in bytes)");
+  Asm->EmitInt8(Asm->getTargetData().getPointerSize());
 
   emitDIE(Die);
   // FIXME - extra padding for gdb bug.
-  Asm->EmitInt8(0); EOL("Extra Pad For GDB");
-  Asm->EmitInt8(0); EOL("Extra Pad For GDB");
-  Asm->EmitInt8(0); EOL("Extra Pad For GDB");
-  Asm->EmitInt8(0); EOL("Extra Pad For GDB");
-  EmitLabel("info_end", ModuleCU->getID());
-  Asm->O << '\n';
+  Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
+  Asm->EmitInt8(0);
+  Asm->EmitInt8(0);
+  Asm->EmitInt8(0);
+  Asm->EmitInt8(0);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", ModuleCU->getID()));
 }
 
 /// emitAbbreviations - Emit the abbreviation section.
@@ -2503,7 +2634,7 @@
     Asm->OutStreamer.SwitchSection(
                             Asm->getObjFileLowering().getDwarfAbbrevSection());
 
-    EmitLabel("abbrev_begin", 0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin"));
 
     // For each abbrevation.
     for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
@@ -2511,18 +2642,16 @@
       const DIEAbbrev *Abbrev = Abbreviations[i];
 
       // Emit the abbrevations code (base 1 index.)
-      EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+      Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
 
       // Emit the abbreviations data.
-      Abbrev->Emit(this);
-      Asm->O << '\n';
+      Abbrev->Emit(Asm);
     }
 
     // Mark end of abbreviations.
-    EmitULEB128(0, "EOM(3)");
+    Asm->EmitULEB128(0, "EOM(3)");
 
-    EmitLabel("abbrev_end", 0);
-    Asm->O << '\n';
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end"));
   }
 }
 
@@ -2531,13 +2660,23 @@
 ///
 void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   // Define last address of section.
-  Asm->EmitInt8(0); EOL("Extended Op");
-  Asm->EmitInt8(TD->getPointerSize() + 1); EOL("Op size");
-  Asm->EmitInt8(dwarf::DW_LNE_set_address); EOL("DW_LNE_set_address");
-  EmitReference("section_end", SectionEnd); EOL("Section end label");
+  Asm->OutStreamer.AddComment("Extended Op");
+  Asm->EmitInt8(0);
+  
+  Asm->OutStreamer.AddComment("Op size");
+  Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
+  Asm->OutStreamer.AddComment("DW_LNE_set_address");
+  Asm->EmitInt8(dwarf::DW_LNE_set_address);
+
+  Asm->OutStreamer.AddComment("Section end label");
+
+  Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
+                                   Asm->getTargetData().getPointerSize(),
+                                   0/*AddrSpace*/);
 
   // Mark end of matrix.
-  Asm->EmitInt8(0); EOL("DW_LNE_end_sequence");
+  Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
+  Asm->EmitInt8(0);
   Asm->EmitInt8(1);
   Asm->EmitInt8(1);
 }
@@ -2547,7 +2686,7 @@
 void DwarfDebug::emitDebugLines() {
   // If the target is using .loc/.file, the assembler will be emitting the
   // .debug_line table automatically.
-  if (MAI->hasDotLocAndDotFile())
+  if (Asm->MAI->hasDotLocAndDotFile())
     return;
 
   // Minimum line delta, thus ranging from -10..(255-10).
@@ -2560,58 +2699,77 @@
                             Asm->getObjFileLowering().getDwarfLineSection());
 
   // Construct the section header.
-  EmitDifference("line_end", 0, "line_begin", 0, true);
-  EOL("Length of Source Line Info");
-  EmitLabel("line_begin", 0);
+  Asm->OutStreamer.AddComment("Length of Source Line Info");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
+                           Asm->GetTempSymbol("line_begin"), 4);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin"));
 
-  Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("DWARF version number");
+  Asm->OutStreamer.AddComment("DWARF version number");
+  Asm->EmitInt16(dwarf::DWARF_VERSION); 
 
-  EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true);
-  EOL("Prolog Length");
-  EmitLabel("line_prolog_begin", 0);
+  Asm->OutStreamer.AddComment("Prolog Length");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"),
+                           Asm->GetTempSymbol("line_prolog_begin"), 4);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_begin"));
 
-  Asm->EmitInt8(1); EOL("Minimum Instruction Length");
-  Asm->EmitInt8(1); EOL("Default is_stmt_start flag");
-  Asm->EmitInt8(MinLineDelta); EOL("Line Base Value (Special Opcodes)");
-  Asm->EmitInt8(MaxLineDelta); EOL("Line Range Value (Special Opcodes)");
-  Asm->EmitInt8(-MinLineDelta); EOL("Special Opcode Base");
+  Asm->OutStreamer.AddComment("Minimum Instruction Length");
+  Asm->EmitInt8(1);
+  Asm->OutStreamer.AddComment("Default is_stmt_start flag");
+  Asm->EmitInt8(1);
+  Asm->OutStreamer.AddComment("Line Base Value (Special Opcodes)");
+  Asm->EmitInt8(MinLineDelta);
+  Asm->OutStreamer.AddComment("Line Range Value (Special Opcodes)");
+  Asm->EmitInt8(MaxLineDelta);
+  Asm->OutStreamer.AddComment("Special Opcode Base");
+  Asm->EmitInt8(-MinLineDelta);
 
   // Line number standard opcode encodings argument count
-  Asm->EmitInt8(0); EOL("DW_LNS_copy arg count");
-  Asm->EmitInt8(1); EOL("DW_LNS_advance_pc arg count");
-  Asm->EmitInt8(1); EOL("DW_LNS_advance_line arg count");
-  Asm->EmitInt8(1); EOL("DW_LNS_set_file arg count");
-  Asm->EmitInt8(1); EOL("DW_LNS_set_column arg count");
-  Asm->EmitInt8(0); EOL("DW_LNS_negate_stmt arg count");
-  Asm->EmitInt8(0); EOL("DW_LNS_set_basic_block arg count");
-  Asm->EmitInt8(0); EOL("DW_LNS_const_add_pc arg count");
-  Asm->EmitInt8(1); EOL("DW_LNS_fixed_advance_pc arg count");
+  Asm->OutStreamer.AddComment("DW_LNS_copy arg count");
+  Asm->EmitInt8(0);
+  Asm->OutStreamer.AddComment("DW_LNS_advance_pc arg count");
+  Asm->EmitInt8(1);
+  Asm->OutStreamer.AddComment("DW_LNS_advance_line arg count");
+  Asm->EmitInt8(1);
+  Asm->OutStreamer.AddComment("DW_LNS_set_file arg count");
+  Asm->EmitInt8(1);
+  Asm->OutStreamer.AddComment("DW_LNS_set_column arg count");
+  Asm->EmitInt8(1);
+  Asm->OutStreamer.AddComment("DW_LNS_negate_stmt arg count");
+  Asm->EmitInt8(0);
+  Asm->OutStreamer.AddComment("DW_LNS_set_basic_block arg count");
+  Asm->EmitInt8(0);
+  Asm->OutStreamer.AddComment("DW_LNS_const_add_pc arg count");
+  Asm->EmitInt8(0);
+  Asm->OutStreamer.AddComment("DW_LNS_fixed_advance_pc arg count");
+  Asm->EmitInt8(1);
 
   // Emit directories.
   for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) {
     const std::string &Dir = getSourceDirectoryName(DI);
-    if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("Directory");
+    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Directory");
     Asm->OutStreamer.EmitBytes(StringRef(Dir.c_str(), Dir.size()+1), 0);
   }
 
-  Asm->EmitInt8(0); EOL("End of directories");
+  Asm->OutStreamer.AddComment("End of directories");
+  Asm->EmitInt8(0);
 
   // Emit files.
   for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) {
     // Remember source id starts at 1.
     std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI);
     const std::string &FN = getSourceFileName(Id.second);
-    if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("Source");
+    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source");
     Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0);
     
-    EmitULEB128(Id.first, "Directory #");
-    EmitULEB128(0, "Mod date");
-    EmitULEB128(0, "File size");
+    Asm->EmitULEB128(Id.first, "Directory #");
+    Asm->EmitULEB128(0, "Mod date");
+    Asm->EmitULEB128(0, "File size");
   }
 
-  Asm->EmitInt8(0); EOL("End of files");
+  Asm->OutStreamer.AddComment("End of files");
+  Asm->EmitInt8(0);
 
-  EmitLabel("line_prolog_end", 0);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_end"));
 
   // A sequence for each text section.
   unsigned SecSrcLinesSize = SectionSourceLines.size();
@@ -2620,13 +2778,6 @@
     // Isolate current sections line info.
     const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
 
-    /*if (Asm->isVerbose()) {
-      const MCSection *S = SectionMap[j + 1];
-      O << '\t' << MAI->getCommentString() << " Section"
-        << S->getName() << '\n';
-    }*/
-    Asm->O << '\n';
-
     // Dwarf assumes we start with first line of first source file.
     unsigned Source = 1;
     unsigned Line = 1;
@@ -2634,33 +2785,40 @@
     // Construct rows of the address, source, line, column matrix.
     for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
       const SrcLineInfo &LineInfo = LineInfos[i];
-      unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID());
-      if (!LabelID) continue;
+      MCSymbol *Label = LineInfo.getLabel();
+      if (!Label->isDefined()) continue; // Not emitted, in dead code.
 
       if (LineInfo.getLine() == 0) continue;
 
-      if (!Asm->isVerbose())
-        Asm->O << '\n';
-      else {
-        std::pair<unsigned, unsigned> SourceID =
+      if (Asm->isVerbose()) {
+        std::pair<unsigned, unsigned> SrcID =
           getSourceDirectoryAndFileIds(LineInfo.getSourceID());
-        O << '\t' << MAI->getCommentString() << ' '
-          << getSourceDirectoryName(SourceID.first) << '/'
-          << getSourceFileName(SourceID.second)
-          << ':' << utostr_32(LineInfo.getLine()) << '\n';
+        Asm->OutStreamer.AddComment(Twine(getSourceDirectoryName(SrcID.first)) +
+                                    "/" +
+                                    Twine(getSourceFileName(SrcID.second)) +
+                                    ":" + Twine(LineInfo.getLine()));
       }
 
       // Define the line address.
-      Asm->EmitInt8(0); EOL("Extended Op");
-      Asm->EmitInt8(TD->getPointerSize() + 1); EOL("Op size");
-      Asm->EmitInt8(dwarf::DW_LNE_set_address); EOL("DW_LNE_set_address");
-      EmitReference("label",  LabelID); EOL("Location label");
+      Asm->OutStreamer.AddComment("Extended Op");
+      Asm->EmitInt8(0);
+      Asm->OutStreamer.AddComment("Op size");
+      Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
 
+      Asm->OutStreamer.AddComment("DW_LNE_set_address");
+      Asm->EmitInt8(dwarf::DW_LNE_set_address); 
+
+      Asm->OutStreamer.AddComment("Location label");
+      Asm->OutStreamer.EmitSymbolValue(Label,
+                                       Asm->getTargetData().getPointerSize(),
+                                       0/*AddrSpace*/);
+      
       // If change of source, then switch to the new source.
       if (Source != LineInfo.getSourceID()) {
         Source = LineInfo.getSourceID();
-        Asm->EmitInt8(dwarf::DW_LNS_set_file); EOL("DW_LNS_set_file");
-        EmitULEB128(Source, "New Source");
+        Asm->OutStreamer.AddComment("DW_LNS_set_file");
+        Asm->EmitInt8(dwarf::DW_LNS_set_file); 
+        Asm->EmitULEB128(Source, "New Source");
       }
 
       // If change of line.
@@ -2675,17 +2833,20 @@
         // If delta is small enough and in range...
         if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
           // ... then use fast opcode.
-          Asm->EmitInt8(Delta - MinLineDelta); EOL("Line Delta");
+          Asm->OutStreamer.AddComment("Line Delta");
+          Asm->EmitInt8(Delta - MinLineDelta);
         } else {
           // ... otherwise use long hand.
+          Asm->OutStreamer.AddComment("DW_LNS_advance_line");
           Asm->EmitInt8(dwarf::DW_LNS_advance_line);
-          EOL("DW_LNS_advance_line");
-          EmitSLEB128(Offset, "Line Offset");
-          Asm->EmitInt8(dwarf::DW_LNS_copy); EOL("DW_LNS_copy");
+          Asm->EmitSLEB128(Offset, "Line Offset");
+          Asm->OutStreamer.AddComment("DW_LNS_copy");
+          Asm->EmitInt8(dwarf::DW_LNS_copy);
         }
       } else {
         // Copy the previous row (different address or source)
-        Asm->EmitInt8(dwarf::DW_LNS_copy); EOL("DW_LNS_copy");
+        Asm->OutStreamer.AddComment("DW_LNS_copy");
+        Asm->EmitInt8(dwarf::DW_LNS_copy);
       }
     }
 
@@ -2698,85 +2859,91 @@
     // put into it, emit an empty table.
     emitEndOfLineMatrix(1);
 
-  EmitLabel("line_end", 0);
-  Asm->O << '\n';
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_end"));
 }
 
 /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
 ///
 void DwarfDebug::emitCommonDebugFrame() {
-  if (!MAI->doesDwarfRequireFrameSection())
+  if (!Asm->MAI->doesDwarfRequireFrameSection())
     return;
 
-  int stackGrowth =
-    Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
-      TargetFrameInfo::StackGrowsUp ?
-    TD->getPointerSize() : -TD->getPointerSize();
+  int stackGrowth = Asm->getTargetData().getPointerSize();
+  if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+      TargetFrameInfo::StackGrowsDown)
+    stackGrowth *= -1;
 
   // Start the dwarf frame section.
   Asm->OutStreamer.SwitchSection(
                               Asm->getObjFileLowering().getDwarfFrameSection());
 
-  EmitLabel("debug_frame_common", 0);
-  EmitDifference("debug_frame_common_end", 0,
-                 "debug_frame_common_begin", 0, true);
-  EOL("Length of Common Information Entry");
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common"));
+  Asm->OutStreamer.AddComment("Length of Common Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_frame_common_end"),
+                           Asm->GetTempSymbol("debug_frame_common_begin"), 4);
 
-  EmitLabel("debug_frame_common_begin", 0);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_begin"));
+  Asm->OutStreamer.AddComment("CIE Identifier Tag");
   Asm->EmitInt32((int)dwarf::DW_CIE_ID);
-  EOL("CIE Identifier Tag");
+  Asm->OutStreamer.AddComment("CIE Version");
   Asm->EmitInt8(dwarf::DW_CIE_VERSION);
-  EOL("CIE Version");
+  Asm->OutStreamer.AddComment("CIE Augmentation");
   Asm->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); // nul terminator.
-  EOL("CIE Augmentation");
-  EmitULEB128(1, "CIE Code Alignment Factor");
-  EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
+  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+  Asm->OutStreamer.AddComment("CIE RA Column");
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
-  EOL("CIE RA Column");
 
   std::vector<MachineMove> Moves;
   RI->getInitialFrameState(Moves);
 
-  EmitFrameMoves(NULL, 0, Moves, false);
+  Asm->EmitFrameMoves(Moves, 0, false);
 
   Asm->EmitAlignment(2, 0, 0, false);
-  EmitLabel("debug_frame_common_end", 0);
-  Asm->O << '\n';
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end"));
 }
 
 /// emitFunctionDebugFrame - Emit per function frame info into a debug frame
 /// section.
-void
-DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
-  if (!MAI->doesDwarfRequireFrameSection())
+void DwarfDebug::
+emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
+  if (!Asm->MAI->doesDwarfRequireFrameSection())
     return;
 
   // Start the dwarf frame section.
   Asm->OutStreamer.SwitchSection(
                               Asm->getObjFileLowering().getDwarfFrameSection());
 
-  EmitDifference("debug_frame_end", DebugFrameInfo.Number,
-                 "debug_frame_begin", DebugFrameInfo.Number, true);
-  EOL("Length of Frame Information Entry");
+  Asm->OutStreamer.AddComment("Length of Frame Information Entry");
+  MCSymbol *DebugFrameBegin =
+    Asm->GetTempSymbol("debug_frame_begin", DebugFrameInfo.Number);
+  MCSymbol *DebugFrameEnd =
+    Asm->GetTempSymbol("debug_frame_end", DebugFrameInfo.Number);
+  Asm->EmitLabelDifference(DebugFrameEnd, DebugFrameBegin, 4);
 
-  EmitLabel("debug_frame_begin", DebugFrameInfo.Number);
+  Asm->OutStreamer.EmitLabel(DebugFrameBegin);
 
-  EmitSectionOffset("debug_frame_common", "section_debug_frame",
-                    0, 0, true, false);
-  EOL("FDE CIE offset");
+  Asm->OutStreamer.AddComment("FDE CIE offset");
+  Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"), 
+                         DwarfFrameSectionSym);
 
-  EmitReference("func_begin", DebugFrameInfo.Number);
-  EOL("FDE initial location");
-  EmitDifference("func_end", DebugFrameInfo.Number,
-                 "func_begin", DebugFrameInfo.Number);
-  EOL("FDE address range");
+  Asm->OutStreamer.AddComment("FDE initial location");
+  MCSymbol *FuncBeginSym =
+    Asm->GetTempSymbol("func_begin", DebugFrameInfo.Number);
+  Asm->OutStreamer.EmitSymbolValue(FuncBeginSym,
+                                   Asm->getTargetData().getPointerSize(),
+                                   0/*AddrSpace*/);
+  
+  
+  Asm->OutStreamer.AddComment("FDE address range");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number),
+                           FuncBeginSym, Asm->getTargetData().getPointerSize());
 
-  EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves,
-                 false);
+  Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false);
 
   Asm->EmitAlignment(2, 0, 0, false);
-  EmitLabel("debug_frame_end", DebugFrameInfo.Number);
-  Asm->O << '\n';
+  Asm->OutStreamer.EmitLabel(DebugFrameEnd);
 }
 
 /// emitDebugPubNames - Emit visible names into a debug pubnames section.
@@ -2786,60 +2953,69 @@
   Asm->OutStreamer.SwitchSection(
                           Asm->getObjFileLowering().getDwarfPubNamesSection());
 
-  EmitDifference("pubnames_end", ModuleCU->getID(),
-                 "pubnames_begin", ModuleCU->getID(), true);
-  EOL("Length of Public Names Info");
+  Asm->OutStreamer.AddComment("Length of Public Names Info");
+  Asm->EmitLabelDifference(
+                 Asm->GetTempSymbol("pubnames_end", ModuleCU->getID()),
+                 Asm->GetTempSymbol("pubnames_begin", ModuleCU->getID()), 4);
 
-  EmitLabel("pubnames_begin", ModuleCU->getID());
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
+                                                ModuleCU->getID()));
 
-  Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("DWARF Version");
+  Asm->OutStreamer.AddComment("DWARF Version");
+  Asm->EmitInt16(dwarf::DWARF_VERSION); 
 
-  EmitSectionOffset("info_begin", "section_info",
-                    ModuleCU->getID(), 0, true, false);
-  EOL("Offset of Compilation Unit Info");
+  Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+  Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()), 
+                         DwarfInfoSectionSym);
 
-  EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(),
-                 true);
-  EOL("Compilation Unit Length");
+  Asm->OutStreamer.AddComment("Compilation Unit Length");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()),
+                           Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
+                           4);
 
   const StringMap<DIE*> &Globals = ModuleCU->getGlobals();
   for (StringMap<DIE*>::const_iterator
          GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
     const char *Name = GI->getKeyData();
-    DIE * Entity = GI->second;
+    DIE *Entity = GI->second;
 
-    Asm->EmitInt32(Entity->getOffset()); EOL("DIE offset");
+    Asm->OutStreamer.AddComment("DIE offset");
+    Asm->EmitInt32(Entity->getOffset());
     
-    if (Asm->VerboseAsm)
+    if (Asm->isVerbose())
       Asm->OutStreamer.AddComment("External Name");
     Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
   }
 
-  Asm->EmitInt32(0); EOL("End Mark");
-  EmitLabel("pubnames_end", ModuleCU->getID());
-  Asm->O << '\n';
+  Asm->OutStreamer.AddComment("End Mark");
+  Asm->EmitInt32(0);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
+                                                ModuleCU->getID()));
 }
 
 void DwarfDebug::emitDebugPubTypes() {
   // Start the dwarf pubnames section.
   Asm->OutStreamer.SwitchSection(
                           Asm->getObjFileLowering().getDwarfPubTypesSection());
-  EmitDifference("pubtypes_end", ModuleCU->getID(),
-                 "pubtypes_begin", ModuleCU->getID(), true);
-  EOL("Length of Public Types Info");
+  Asm->OutStreamer.AddComment("Length of Public Types Info");
+  Asm->EmitLabelDifference(
+                    Asm->GetTempSymbol("pubtypes_end", ModuleCU->getID()),
+                    Asm->GetTempSymbol("pubtypes_begin", ModuleCU->getID()), 4);
 
-  EmitLabel("pubtypes_begin", ModuleCU->getID());
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+                                                ModuleCU->getID()));
 
-  if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DWARF Version");
+  if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
   Asm->EmitInt16(dwarf::DWARF_VERSION);
 
-  EmitSectionOffset("info_begin", "section_info",
-                    ModuleCU->getID(), 0, true, false);
-  EOL("Offset of Compilation ModuleCU Info");
+  Asm->OutStreamer.AddComment("Offset of Compilation ModuleCU Info");
+  Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
+                         DwarfInfoSectionSym);
 
-  EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(),
-                 true);
-  EOL("Compilation ModuleCU Length");
+  Asm->OutStreamer.AddComment("Compilation ModuleCU Length");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()),
+                           Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
+                           4);
 
   const StringMap<DIE*> &Globals = ModuleCU->getGlobalTypes();
   for (StringMap<DIE*>::const_iterator
@@ -2847,39 +3023,46 @@
     const char *Name = GI->getKeyData();
     DIE * Entity = GI->second;
 
-    if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DIE offset");
+    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
     Asm->EmitInt32(Entity->getOffset());
     
-    if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("External Name");
+    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
     Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
   }
 
-  Asm->EmitInt32(0); EOL("End Mark");
-  EmitLabel("pubtypes_end", ModuleCU->getID());
-  Asm->O << '\n';
+  Asm->OutStreamer.AddComment("End Mark");
+  Asm->EmitInt32(0); 
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+                                                ModuleCU->getID()));
 }
 
 /// emitDebugStr - Emit visible names into a debug str section.
 ///
 void DwarfDebug::emitDebugStr() {
   // Check to see if it is worth the effort.
-  if (!StringPool.empty()) {
-    // Start the dwarf str section.
-    Asm->OutStreamer.SwitchSection(
+  if (StringPool.empty()) return;
+  
+  // Start the dwarf str section.
+  Asm->OutStreamer.SwitchSection(
                                 Asm->getObjFileLowering().getDwarfStrSection());
 
-    // For each of strings in the string pool.
-    for (unsigned StringID = 1, N = StringPool.size();
-         StringID <= N; ++StringID) {
-      // Emit a label for reference from debug information entries.
-      EmitLabel("string", StringID);
-
-      // Emit the string itself.
-      const std::string &String = StringPool[StringID];
-      Asm->OutStreamer.EmitBytes(StringRef(String.c_str(), String.size()+1), 0);
-    }
-
-    Asm->O << '\n';
+  // Get all of the string pool entries and put them in an array by their ID so
+  // we can sort them.
+  SmallVector<std::pair<unsigned, 
+      StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+  
+  for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
+       I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
+    Entries.push_back(std::make_pair(I->second.second, &*I));
+  
+  array_pod_sort(Entries.begin(), Entries.end());
+  
+  for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+    // Emit a label for reference from debug information entries.
+    Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
+    
+    // Emit the string itself.
+    Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
   }
 }
 
@@ -2897,33 +3080,6 @@
   // Start the dwarf aranges section.
   Asm->OutStreamer.SwitchSection(
                           Asm->getObjFileLowering().getDwarfARangesSection());
-
-  // FIXME - Mock up
-#if 0
-  CompileUnit *Unit = GetBaseCompileUnit();
-
-  // Don't include size of length
-  Asm->EmitInt32(0x1c); EOL("Length of Address Ranges Info");
-
-  Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("Dwarf Version");
-
-  EmitReference("info_begin", Unit->getID());
-  EOL("Offset of Compilation Unit Info");
-
-  Asm->EmitInt8(TD->getPointerSize()); EOL("Size of Address");
-
-  Asm->EmitInt8(0); EOL("Size of Segment Descriptor");
-
-  Asm->EmitInt16(0);  EOL("Pad (1)");
-  Asm->EmitInt16(0);  EOL("Pad (2)");
-
-  // Range 1
-  EmitReference("text_begin", 0); EOL("Address");
-  EmitDifference("text_end", 0, "text_begin", 0, true); EOL("Length");
-
-  Asm->EmitInt32(0); EOL("EOM (1)");
-  Asm->EmitInt32(0); EOL("EOM (2)");
-#endif
 }
 
 /// emitDebugRanges - Emit visible names into a debug ranges section.
@@ -2931,7 +3087,16 @@
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfRangesSection());
+    Asm->getObjFileLowering().getDwarfRangesSection());
+  for (SmallVector<const MCSymbol *, 8>::const_iterator I = DebugRangeSymbols.begin(),
+         E = DebugRangeSymbols.end(); I != E; ++I) {
+    if (*I) 
+      Asm->EmitLabelDifference(*I, TextSectionSym,
+                               Asm->getTargetData().getPointerSize());
+    else
+      Asm->OutStreamer.EmitIntValue(0, Asm->getTargetData().getPointerSize(), 
+                                    /*addrspace*/0);
+  }
 }
 
 /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
@@ -2963,7 +3128,7 @@
 /// __debug_info section, and the low_pc is the starting address for the
 /// inlining instance.
 void DwarfDebug::emitDebugInlineInfo() {
-  if (!MAI->doesDwarfUsesInlineInfoSection())
+  if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
     return;
 
   if (!ModuleCU)
@@ -2972,14 +3137,16 @@
   Asm->OutStreamer.SwitchSection(
                         Asm->getObjFileLowering().getDwarfDebugInlineSection());
 
-  EmitDifference("debug_inlined_end", 1,
-                 "debug_inlined_begin", 1, true);
-  EOL("Length of Debug Inlined Information Entry");
+  Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1),
+                           Asm->GetTempSymbol("debug_inlined_begin", 1), 4);
 
-  EmitLabel("debug_inlined_begin", 1);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
 
-  Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("Dwarf Version");
-  Asm->EmitInt8(TD->getPointerSize()); EOL("Address Size (in bytes)");
+  Asm->OutStreamer.AddComment("Dwarf Version");
+  Asm->EmitInt16(dwarf::DWARF_VERSION);
+  Asm->OutStreamer.AddComment("Address Size (in bytes)");
+  Asm->EmitInt8(Asm->getTargetData().getPointerSize());
 
   for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
@@ -2992,33 +3159,28 @@
     StringRef LName = SP.getLinkageName();
     StringRef Name = SP.getName();
 
+    Asm->OutStreamer.AddComment("MIPS linkage name");
     if (LName.empty()) {
       Asm->OutStreamer.EmitBytes(Name, 0);
       Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
     } else 
-      EmitSectionOffset("string", "section_str",
-                      StringPool.idFor(getRealLinkageName(LName)), false, true);
+      Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
+                             DwarfStrSectionSym);
 
-    EOL("MIPS linkage name");
-    EmitSectionOffset("string", "section_str",
-                      StringPool.idFor(Name), false, true);
-    EOL("Function name");
-    EmitULEB128(Labels.size(), "Inline count");
+    Asm->OutStreamer.AddComment("Function name");
+    Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+    Asm->EmitULEB128(Labels.size(), "Inline count");
 
     for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
            LE = Labels.end(); LI != LE; ++LI) {
-      DIE *SP = LI->second;
-      Asm->EmitInt32(SP->getOffset()); EOL("DIE offset");
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(LI->second->getOffset());
 
-      if (TD->getPointerSize() == sizeof(int32_t))
-        O << MAI->getData32bitsDirective();
-      else
-        O << MAI->getData64bitsDirective();
-
-      PrintLabelName("label", LI->first); EOL("low_pc");
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
+      Asm->OutStreamer.EmitSymbolValue(LI->first,
+                                       Asm->getTargetData().getPointerSize(),0);
     }
   }
 
-  EmitLabel("debug_inlined_end", 1);
-  Asm->O << '\n';
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 55baa92..bfd6f48 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,18 +14,14 @@
 #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
 #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
 
-#include "DIE.h"
-#include "DwarfPrinter.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Support/raw_ostream.h"
+#include "DIE.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/UniqueVector.h"
-#include <string>
+#include "llvm/Support/Allocator.h"
 
 namespace llvm {
 
@@ -34,9 +30,26 @@
 class DbgScope;
 class DbgVariable;
 class MachineFrameInfo;
+class MachineLocation;
 class MachineModuleInfo;
 class MCAsmInfo;
-class Timer;
+class DIEAbbrev;
+class DIE;
+class DIEBlock;
+class DIEEntry;
+
+class DIEnumerator;
+class DIDescriptor;
+class DIVariable;
+class DIGlobal;
+class DIGlobalVariable;
+class DISubprogram;
+class DIBasicType;
+class DIDerivedType;
+class DIType;
+class DINameSpace;
+class DISubrange;
+class DICompositeType;
 
 //===----------------------------------------------------------------------===//
 /// SrcLineInfo - This class is used to record source line correspondence.
@@ -45,31 +58,29 @@
   unsigned Line;                     // Source line number.
   unsigned Column;                   // Source column.
   unsigned SourceID;                 // Source ID number.
-  unsigned LabelID;                  // Label in code ID number.
+  MCSymbol *Label;                   // Label in code ID number.
 public:
-  SrcLineInfo(unsigned L, unsigned C, unsigned S, unsigned I)
-    : Line(L), Column(C), SourceID(S), LabelID(I) {}
+  SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
+    : Line(L), Column(C), SourceID(S), Label(label) {}
 
   // Accessors
   unsigned getLine() const { return Line; }
   unsigned getColumn() const { return Column; }
   unsigned getSourceID() const { return SourceID; }
-  unsigned getLabelID() const { return LabelID; }
+  MCSymbol *getLabel() const { return Label; }
 };
 
-class DwarfDebug : public DwarfPrinter {
+class DwarfDebug {
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  /// MMI - Collected machine module information.
+  MachineModuleInfo *MMI;
+
   //===--------------------------------------------------------------------===//
   // Attributes used to construct specific Dwarf sections.
   //
 
-  /// CompileUnitMap - A map of global variables representing compile units to
-  /// compile units.
-  DenseMap<Value *, CompileUnit *> CompileUnitMap;
-
-  /// CompileUnits - All the compile units in this module.
-  ///
-  SmallVector<CompileUnit *, 8> CompileUnits;
-
   /// ModuleCU - All DIEs are inserted in ModuleCU.
   CompileUnit *ModuleCU;
 
@@ -106,13 +117,18 @@
   /// Lines - List of source line correspondence.
   std::vector<SrcLineInfo> Lines;
 
-  /// DIEValues - A list of all the unique values in use.
-  ///
-  std::vector<DIEValue *> DIEValues;
+  /// DIEBlocks - A list of all the DIEBlocks in use.
+  std::vector<DIEBlock *> DIEBlocks;
 
-  /// StringPool - A UniqueVector of strings used by indirect references.
-  ///
-  UniqueVector<std::string> StringPool;
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+
+  /// StringPool - A String->Symbol mapping of strings used by indirect
+  /// references.
+  StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
+  unsigned NextStringPoolNumber;
+  
+  MCSymbol *getStringPoolEntry(StringRef Str);
 
   /// SectionMap - Provides a unique id per text section.
   ///
@@ -122,19 +138,12 @@
   ///
   std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
 
-  /// didInitial - Flag to indicate if initial emission has been done.
-  ///
-  bool didInitial;
-
-  /// shouldEmit - Flag to indicate if debug information should be emitted.
-  ///
-  bool shouldEmit;
-
   // CurrentFnDbgScope - Top level scope for the current function.
   //
   DbgScope *CurrentFnDbgScope;
   
-  /// DbgScopeMap - Tracks the scopes in the current function.
+  /// DbgScopeMap - Tracks the scopes in the current function.  Owns the
+  /// contained DbgScope*s.
   ///
   DenseMap<MDNode *, DbgScope *> DbgScopeMap;
 
@@ -143,50 +152,57 @@
   DenseMap<MDNode *, DbgScope *> ConcreteScopes;
 
   /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
-  /// not included DbgScopeMap.
+  /// not included DbgScopeMap.  AbstractScopes owns its DbgScope*s.
   DenseMap<MDNode *, DbgScope *> AbstractScopes;
+
+  /// AbstractScopesList - Tracks abstract scopes constructed while processing
+  /// a function. This list is cleared during endFunction().
   SmallVector<DbgScope *, 4>AbstractScopesList;
 
-  /// AbstractVariables - Collection on abstract variables.
+  /// AbstractVariables - Collection on abstract variables.  Owned by the
+  /// DbgScopes in AbstractScopes.
   DenseMap<MDNode *, DbgVariable *> AbstractVariables;
 
+  /// DbgValueStartMap - Tracks starting scope of variable DIEs.
+  /// If the scope of an object begins sometime after the low pc value for the 
+  /// scope most closely enclosing the object, the object entry may have a 
+  /// DW_AT_start_scope attribute.
+  DenseMap<const MachineInstr *, DbgVariable *> DbgValueStartMap;
+
   /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
   /// (at the end of the module) as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
+  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+  /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, MDNode *> ContainingTypeMap;
 
-  /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
-  SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
-
-  /// TopLevelDIEs - Collection of top level DIEs. 
-  SmallPtrSet<DIE *, 4> TopLevelDIEs;
-  SmallVector<DIE *, 4> TopLevelDIEsVector;
-
   typedef SmallVector<DbgScope *, 2> ScopeVector;
-  typedef DenseMap<const MachineInstr *, ScopeVector>
-    InsnToDbgScopeMapTy;
-
-  /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts.
-  InsnToDbgScopeMapTy DbgScopeBeginMap;
-
-  /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
-  InsnToDbgScopeMapTy DbgScopeEndMap;
+  SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet;
+  SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet;
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
-  typedef std::pair<unsigned, DIE *> InlineInfoLabels;
-  DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+  typedef std::pair<MCSymbol*, DIE *> InlineInfoLabels;
+  DenseMap<MDNode*, SmallVector<InlineInfoLabels, 4> > InlineInfo;
   SmallVector<MDNode *, 4> InlinedSPNodes;
 
-  /// CompileUnitOffsets - A vector of the offsets of the compile units. This is
-  /// used when calculating the "origin" of a concrete instance of an inlined
-  /// function.
-  DenseMap<CompileUnit *, unsigned> CompileUnitOffsets;
+  /// InsnBeforeLabelMap - Maps instruction with label emitted before 
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> InsnBeforeLabelMap;
 
-  /// DebugTimer - Timer for the Dwarf debug writer.
-  Timer *DebugTimer;
-  
+  /// InsnAfterLabelMap - Maps instruction with label emitted after
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> InsnAfterLabelMap;
+
+  SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
+
+  /// Previous instruction's location information. This is used to determine
+  /// label location to indicate scope boundries in dwarf debug info.
+  DebugLoc PrevInstLoc;
+  MCSymbol *PrevLabel;
+
   struct FunctionDebugFrameInfo {
     unsigned Number;
     std::vector<MachineMove> Moves;
@@ -197,6 +213,14 @@
 
   std::vector<FunctionDebugFrameInfo> DebugFrames;
 
+  // Section Symbols: these are assembler temporary labels that are emitted at
+  // the beginning of each supported dwarf section.  These are used to form
+  // section offsets and are created by EmitSectionLabels.
+  MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
+  MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
+  
+private:
+  
   /// getSourceDirectoryAndFileIds - Return the directory and file ids that
   /// maps to the source id. Source id starts at 1.
   std::pair<unsigned, unsigned>
@@ -233,7 +257,7 @@
 
   /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
   /// information entry.
-  DIEEntry *createDIEEntry(DIE *Entry = NULL);
+  DIEEntry *createDIEEntry(DIE *Entry);
 
   /// addUInt - Add an unsigned integer attribute data and value.
   ///
@@ -251,30 +275,17 @@
   /// addLabel - Add a Dwarf label attribute data and value.
   ///
   void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
-                const DWLabel &Label);
-
-  /// addObjectLabel - Add an non-Dwarf label attribute data and value.
-  ///
-  void addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
-                      const MCSymbol *Sym);
-
-  /// addSectionOffset - Add a section offset label attribute data and value.
-  ///
-  void addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
-                        const DWLabel &Label, const DWLabel &Section,
-                        bool isEH = false, bool useSet = true);
+                const MCSymbol *Label);
 
   /// addDelta - Add a label delta attribute data and value.
   ///
   void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
-                const DWLabel &Hi, const DWLabel &Lo);
+                const MCSymbol *Hi, const MCSymbol *Lo);
 
   /// addDIEEntry - Add a DIE attribute data and value.
   ///
-  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
-    Die->addValue(Attribute, Form, createDIEEntry(Entry));
-  }
-
+  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+  
   /// addBlock - Add block data.
   ///
   void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
@@ -346,7 +357,7 @@
                              DICompositeType *CTy);
 
   /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  DIE *constructEnumTypeDIE(DIEnumerator *ETy);
+  DIE *constructEnumTypeDIE(DIEnumerator ETy);
 
   /// createGlobalVariableDIE - Create new DIE using GV.
   DIE *createGlobalVariableDIE(const DIGlobalVariable &GV);
@@ -357,13 +368,10 @@
   /// createSubprogramDIE - Create new DIE using SP.
   DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
 
-  /// findCompileUnit - Get the compile unit for the given descriptor. 
-  ///
-  CompileUnit *findCompileUnit(DICompileUnit Unit);
-
   /// getUpdatedDbgScope - Find or create DbgScope assicated with 
   /// the instruction. Initialize scope and update scope hierarchy.
-  DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+  DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
+                               MDNode *InlinedAt);
 
   /// createDbgScope - Create DbgScope for the scope.
   void createDbgScope(MDNode *Scope, MDNode *InlinedAt);
@@ -372,7 +380,9 @@
 
   /// findAbstractVariable - Find abstract variable associated with Var.
   DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, 
-                                    DILocation &Loc);
+                                    DebugLoc Loc);
+  DbgVariable *findAbstractVariable(DIVariable &Var, const MachineInstr *MI,
+                                    DebugLoc Loc);
 
   /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
   /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
@@ -395,9 +405,9 @@
   /// constructScopeDIE - Construct a DIE for this scope.
   DIE *constructScopeDIE(DbgScope *Scope);
 
-  /// emitInitial - Emit initial Dwarf declarations.  This is necessary for cc
-  /// tools to recognize the object file contains Dwarf information.
-  void emitInitial();
+  /// EmitSectionLabels - Emit initial Dwarf sections with a label at
+  /// the start of each one.
+  void EmitSectionLabels();
 
   /// emitDIE - Recusively Emits a debug information entry.
   ///
@@ -486,11 +496,11 @@
 
   /// GetOrCreateSourceID - Look up the source id with the given directory and
   /// source file names. If none currently exists, create a new id and insert it
-  /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps
-  /// as well.
+  /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
+  /// maps as well.
   unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
 
-  CompileUnit *constructCompileUnit(MDNode *N);
+  void constructCompileUnit(MDNode *N);
 
   void constructGlobalVariableDIE(MDNode *N);
 
@@ -502,20 +512,38 @@
   ///
   DIType getBlockByrefType(DIType Ty, std::string Name);
 
+  /// recordSourceLine - Register a source line with debug info. Returns the
+  /// unique label that was emitted and which provides correspondence to
+  /// the source line list.
+  MCSymbol *recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
+  
+  /// getSourceLineCount - Return the number of source lines in the debug
+  /// info.
+  unsigned getSourceLineCount() const {
+    return Lines.size();
+  }
+  
+  /// identifyScopeMarkers() - Indentify instructions that are marking
+  /// beginning of or end of a scope.
+  void identifyScopeMarkers();
+
+  /// extractScopeInformation - Scan machine instructions in this function
+  /// and collect DbgScopes. Return true, if atleast one scope was found.
+  bool extractScopeInformation();
+  
+  /// collectVariableInfo - Populate DbgScope entries with variables' info.
+  void collectVariableInfo();
+  
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
-  DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T);
-  virtual ~DwarfDebug();
-
-  /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
-  /// be emitted.
-  bool ShouldEmitDwarfDebug() const { return shouldEmit; }
+  DwarfDebug(AsmPrinter *A, Module *M);
+  ~DwarfDebug();
 
   /// beginModule - Emit all Dwarf sections that should come prior to the
   /// content.
-  void beginModule(Module *M, MachineModuleInfo *MMI);
+  void beginModule(Module *M);
 
   /// endModule - Emit all Dwarf sections that should come after the content.
   ///
@@ -529,34 +557,8 @@
   ///
   void endFunction(const MachineFunction *MF);
 
-  /// recordSourceLine - Records location information and associates it with a 
-  /// label. Returns a unique label ID used to generate a label and provide
-  /// correspondence to the source line list.
-  unsigned recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
-
-  /// getSourceLineCount - Return the number of source lines in the debug
-  /// info.
-  unsigned getSourceLineCount() const {
-    return Lines.size();
-  }
-                            
-  /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
-  /// timed. Look up the source id with the given directory and source file
-  /// names. If none currently exists, create a new id and insert it in the
-  /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
-  /// well.
-  unsigned getOrCreateSourceID(const std::string &DirName,
-                               const std::string &FileName);
-
-  /// extractScopeInformation - Scan machine instructions in this function
-  /// and collect DbgScopes. Return true, if atleast one scope was found.
-  bool extractScopeInformation();
-
-  /// collectVariableInfo - Populate DbgScope entries with variables' info.
-  void collectVariableInfo();
-
-  /// beginScope - Process beginning of a scope starting at Label.
-  void beginScope(const MachineInstr *MI, unsigned Label);
+  /// beginScope - Process beginning of a scope.
+  void beginScope(const MachineInstr *MI);
 
   /// endScope - Prcess end of a scope.
   void endScope(const MachineInstr *MI);
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 2b08ba4..cb6db0f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -13,6 +13,7 @@
 
 #include "DwarfException.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -27,82 +28,61 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A,
-                               const MCAsmInfo *T)
-  : DwarfPrinter(OS, A, T, "eh"), shouldEmitTable(false),shouldEmitMoves(false),
-    shouldEmitTableModule(false), shouldEmitMovesModule(false),
-    ExceptionTimer(0) {
-  if (TimePassesIsEnabled)
-    ExceptionTimer = new Timer("DWARF Exception Writer");
-}
+DwarfException::DwarfException(AsmPrinter *A)
+  : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
+    shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
 
-DwarfException::~DwarfException() {
-  delete ExceptionTimer;
-}
-
-/// CreateLabelDiff - Emit a label and subtract it from the expression we
-/// already have.  This is equivalent to emitting "foo - .", but we have to emit
-/// the label for "." directly.
-const MCExpr *DwarfException::CreateLabelDiff(const MCExpr *ExprRef,
-                                              const char *LabelName,
-                                              unsigned Index) {
-  SmallString<64> Name;
-  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
-                            << LabelName << Asm->getFunctionNumber()
-                            << "_" << Index;
-  MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
-  Asm->OutStreamer.EmitLabel(DotSym);
-
-  return MCBinaryExpr::CreateSub(ExprRef,
-                                 MCSymbolRefExpr::Create(DotSym,
-                                                         Asm->OutContext),
-                                 Asm->OutContext);
-}
+DwarfException::~DwarfException() {}
 
 /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
 /// is shared among many Frame Description Entries.  There is at least one CIE
 /// in every non-empty .debug_frame section.
 void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   // Size and sign of stack growth.
-  int stackGrowth =
-    Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
-    TargetFrameInfo::StackGrowsUp ?
-    TD->getPointerSize() : -TD->getPointerSize();
+  int stackGrowth = Asm->getTargetData().getPointerSize();
+  if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+      TargetFrameInfo::StackGrowsDown)
+    stackGrowth *= -1;
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   // Begin eh frame section.
   Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
 
-  if (MAI->is_EHSymbolPrivate())
-    O << MAI->getPrivateGlobalPrefix();
-  O << "EH_frame" << Index << ":\n";
+  MCSymbol *EHFrameSym;
+  if (TLOF.isFunctionEHFrameSymbolPrivate())
+    EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
+  else
+    EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") + 
+                                                   Twine(Index));
+  Asm->OutStreamer.EmitLabel(EHFrameSym);
   
-  EmitLabel("section_eh_frame", Index);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
 
   // Define base labels.
-  EmitLabel("eh_frame_common", Index);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
 
   // Define the eh frame length.
-  EmitDifference("eh_frame_common_end", Index,
-                 "eh_frame_common_begin", Index, true);
-  EOL("Length of Common Information Entry");
+  Asm->OutStreamer.AddComment("Length of Common Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
+                           Asm->GetTempSymbol("eh_frame_common_begin", Index),
+                           4);
 
   // EH frame header.
-  EmitLabel("eh_frame_common_begin", Index);
-  if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("CIE Identifier Tag");
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
+  Asm->OutStreamer.AddComment("CIE Identifier Tag");
   Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-  if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DW_CIE_VERSION");
+  Asm->OutStreamer.AddComment("DW_CIE_VERSION");
   Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
 
   // The personality presence indicates that language specific information will
@@ -120,7 +100,7 @@
   if (PersonalityFn) {
     // There is a personality function.
     *APtr++ = 'P';
-    AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding);
+    AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
   }
 
   if (UsesLSDA[Index]) {
@@ -138,41 +118,43 @@
   if (APtr != Augmentation + 1)
     Augmentation[0] = 'z';
 
+  Asm->OutStreamer.AddComment("CIE Augmentation");
   Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
-  EOL("CIE Augmentation");
 
   // Round out reader.
-  EmitULEB128(1, "CIE Code Alignment Factor");
-  EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
+  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+  Asm->OutStreamer.AddComment("CIE Return Address Column");
+
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
-  EOL("CIE Return Address Column");
 
   if (Augmentation[0]) {
-    EmitULEB128(AugmentationSize, "Augmentation Size");
+    Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
 
     // If there is a personality, we need to indicate the function's location.
     if (PersonalityFn) {
-      EmitEncodingByte(PerEncoding, "Personality");
-      EmitReference(PersonalityFn, PerEncoding);
-      EOL("Personality");
+      Asm->EmitEncodingByte(PerEncoding, "Personality");
+      Asm->OutStreamer.AddComment("Personality");
+      Asm->EmitReference(PersonalityFn, PerEncoding);
     }
     if (UsesLSDA[Index])
-      EmitEncodingByte(LSDAEncoding, "LSDA");
+      Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
     if (FDEEncoding != dwarf::DW_EH_PE_absptr)
-      EmitEncodingByte(FDEEncoding, "FDE");
+      Asm->EmitEncodingByte(FDEEncoding, "FDE");
   }
 
   // Indicate locations of general callee saved registers in frame.
   std::vector<MachineMove> Moves;
   RI->getInitialFrameState(Moves);
-  EmitFrameMoves(NULL, 0, Moves, true);
+  Asm->EmitFrameMoves(Moves, 0, true);
 
   // On Darwin the linker honors the alignment of eh_frame, which means it must
   // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
   // holes which confuse readers of eh_frame.
-  Asm->EmitAlignment(TD->getPointerSize() == 4 ? 2 : 3, 0, 0, false);
-  EmitLabel("eh_frame_common_end", Index);
-  Asm->O << '\n';
+  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3,
+                     0, 0, false);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
 }
 
 /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
@@ -190,17 +172,17 @@
 
   // Externally visible entry into the functions eh frame info. If the
   // corresponding function is static, this should not be externally visible.
-  if (!TheFunc->hasLocalLinkage())
-    if (const char *GlobalEHDirective = MAI->getGlobalEHDirective())
-      O << GlobalEHDirective << *EHFrameInfo.FunctionEHSym << '\n';
+  if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
+    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
 
   // If corresponding function is weak definition, this should be too.
-  if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective())
-    O << MAI->getWeakDefDirective() << *EHFrameInfo.FunctionEHSym << '\n';
+  if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
+    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                         MCSA_WeakDefinition);
 
   // If corresponding function is hidden, this should be too.
   if (TheFunc->hasHiddenVisibility())
-    if (MCSymbolAttr HiddenAttr = MAI->getHiddenVisibilityAttr())
+    if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
       Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
                                            HiddenAttr);
 
@@ -210,65 +192,73 @@
   // info is to be available for non-EH uses.
   if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory &&
       (!TheFunc->isWeakForLinker() ||
-       !MAI->getWeakDefDirective() ||
-       MAI->getSupportsWeakOmittedEHFrame())) {
-    O << *EHFrameInfo.FunctionEHSym << " = 0\n";
+       !Asm->MAI->getWeakDefDirective() ||
+       TLOF.getSupportsWeakOmittedEHFrame())) {
+    Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
+                                    MCConstantExpr::Create(0, Asm->OutContext));
     // This name has no connection to the function, so it might get
     // dead-stripped when the function is not, erroneously.  Prohibit
     // dead-stripping unconditionally.
-    if (MAI->hasNoDeadStrip())
+    if (Asm->MAI->hasNoDeadStrip())
       Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
                                            MCSA_NoDeadStrip);
   } else {
-    O << *EHFrameInfo.FunctionEHSym << ":\n";
+    Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
 
     // EH frame header.
-    EmitDifference("eh_frame_end", EHFrameInfo.Number,
-                   "eh_frame_begin", EHFrameInfo.Number,
-                   true);
-    EOL("Length of Frame Information Entry");
+    Asm->OutStreamer.AddComment("Length of Frame Information Entry");
+    Asm->EmitLabelDifference(
+                Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
+                Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
 
-    EmitLabel("eh_frame_begin", EHFrameInfo.Number);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
+                                                  EHFrameInfo.Number));
 
-    EmitSectionOffset("eh_frame_begin", "eh_frame_common",
-                      EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
-                      true, true, false);
+    Asm->OutStreamer.AddComment("FDE CIE offset");
+    Asm->EmitLabelDifference(
+                       Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
+                       Asm->GetTempSymbol("eh_frame_common",
+                                          EHFrameInfo.PersonalityIndex), 4);
 
-    EOL("FDE CIE offset");
+    MCSymbol *EHFuncBeginSym =
+      Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
 
-    EmitReference("eh_func_begin", EHFrameInfo.Number, FDEEncoding);
-    EOL("FDE initial location");
-    EmitDifference("eh_func_end", EHFrameInfo.Number,
-                   "eh_func_begin", EHFrameInfo.Number,
-                   SizeOfEncodedValue(FDEEncoding) == 4);
-    EOL("FDE address range");
+    Asm->OutStreamer.AddComment("FDE initial location");
+    Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
+    
+    Asm->OutStreamer.AddComment("FDE address range");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
+                                                EHFrameInfo.Number),
+                             EHFuncBeginSym,
+                             Asm->GetSizeOfEncodedValue(FDEEncoding));
 
     // If there is a personality and landing pads then point to the language
     // specific data area in the exception table.
     if (MMI->getPersonalities()[0] != NULL) {
-      unsigned Size = SizeOfEncodedValue(LSDAEncoding);
+      unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
 
-      EmitULEB128(Size, "Augmentation size");
+      Asm->EmitULEB128(Size, "Augmentation size");
+      Asm->OutStreamer.AddComment("Language Specific Data Area");
       if (EHFrameInfo.hasLandingPads)
-        EmitReference("exception", EHFrameInfo.Number, LSDAEncoding);
+        Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
+                           LSDAEncoding);
       else
         Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
 
-      EOL("Language Specific Data Area");
     } else {
-      EmitULEB128(0, "Augmentation size");
+      Asm->EmitULEB128(0, "Augmentation size");
     }
 
     // Indicate locations of function specific callee saved registers in frame.
-    EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves,
-                   true);
+    Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
 
     // On Darwin the linker honors the alignment of eh_frame, which means it
     // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
     // get holes which confuse readers of eh_frame.
-    Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3,
+    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3,
                        0, 0, false);
-    EmitLabel("eh_frame_end", EHFrameInfo.Number);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
+                                                  EHFrameInfo.Number));
 
     // If the function is marked used, this table should be also.  We cannot
     // make the mark unconditional in this case, since retaining the table also
@@ -276,11 +266,11 @@
     // on unused functions (calling undefined externals) being dead-stripped to
     // link correctly.  Yes, there really is.
     if (MMI->isUsedFunction(EHFrameInfo.function))
-      if (MAI->hasNoDeadStrip())
+      if (Asm->MAI->hasNoDeadStrip())
         Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
                                              MCSA_NoDeadStrip);
   }
-  Asm->O << '\n';
+  Asm->OutStreamer.AddBlankLine();
 }
 
 /// SharedTypeIds - How many leading type ids two landing pads have in common.
@@ -362,7 +352,7 @@
          I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
     const LandingPadInfo *LPI = *I;
     const std::vector<int> &TypeIds = LPI->TypeIds;
-    const unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+    unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
     unsigned SizeSiteActions = 0;
 
     if (NumShared < TypeIds.size()) {
@@ -370,7 +360,7 @@
       unsigned PrevAction = (unsigned)-1;
 
       if (NumShared) {
-        const unsigned SizePrevIds = PrevLPI->TypeIds.size();
+        unsigned SizePrevIds = PrevLPI->TypeIds.size();
         assert(Actions.size());
         PrevAction = Actions.size() - 1;
         SizeAction =
@@ -433,23 +423,24 @@
   for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
     const MachineOperand &MO = MI->getOperand(I);
 
-    if (MO.isGlobal()) {
-      if (Function *F = dyn_cast<Function>(MO.getGlobal())) {
-        if (SawFunc) {
-          // Be conservative. If we have more than one function operand for this
-          // call, then we can't make the assumption that it's the callee and
-          // not a parameter to the call.
-          // 
-          // FIXME: Determine if there's a way to say that `F' is the callee or
-          // parameter.
-          MarkedNoUnwind = false;
-          break;
-        }
+    if (!MO.isGlobal()) continue;
+    
+    const Function *F = dyn_cast<Function>(MO.getGlobal());
+    if (F == 0) continue;
 
-        MarkedNoUnwind = F->doesNotThrow();
-        SawFunc = true;
-      }
+    if (SawFunc) {
+      // Be conservative. If we have more than one function operand for this
+      // call, then we can't make the assumption that it's the callee and
+      // not a parameter to the call.
+      // 
+      // FIXME: Determine if there's a way to say that `F' is the callee or
+      // parameter.
+      MarkedNoUnwind = false;
+      break;
     }
+
+    MarkedNoUnwind = F->doesNotThrow();
+    SawFunc = true;
   }
 
   return MarkedNoUnwind;
@@ -468,7 +459,7 @@
                      const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
                      const SmallVectorImpl<unsigned> &FirstActions) {
   // The end label of the previous invoke or nounwind try-range.
-  unsigned LastLabel = 0;
+  MCSymbol *LastLabel = 0;
 
   // Whether there is a potentially throwing instruction (currently this means
   // an ordinary call) between the end of the previous try-range and now.
@@ -478,21 +469,18 @@
   bool PreviousIsInvoke = false;
 
   // Visit all instructions in order of address.
-  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
          MI != E; ++MI) {
       if (!MI->isLabel()) {
         if (MI->getDesc().isCall())
           SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
-
         continue;
       }
 
-      unsigned BeginLabel = MI->getOperand(0).getImm();
-      assert(BeginLabel && "Invalid label!");
-
       // End of the previous try-range?
+      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
       if (BeginLabel == LastLabel)
         SawPotentiallyThrowing = false;
 
@@ -512,7 +500,7 @@
       // create a call-site entry with no landing pad for the region between the
       // try-ranges.
       if (SawPotentiallyThrowing &&
-          MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+          Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
         CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
         CallSites.push_back(Site);
         PreviousIsInvoke = false;
@@ -521,7 +509,10 @@
       LastLabel = LandingPad->EndLabels[P.RangeIndex];
       assert(BeginLabel && LastLabel && "Invalid landing pad!");
 
-      if (LandingPad->LandingPadLabel) {
+      if (!LandingPad->LandingPadLabel) {
+        // Create a gap.
+        PreviousIsInvoke = false;
+      } else {
         // This try-range is for an invoke.
         CallSiteEntry Site = {
           BeginLabel,
@@ -532,7 +523,7 @@
 
         // Try to merge with the previous call-site. SJLJ doesn't do this
         if (PreviousIsInvoke &&
-          MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+          Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
           CallSiteEntry &Prev = CallSites.back();
           if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
             // Extend the range of the previous entry.
@@ -542,7 +533,7 @@
         }
 
         // Otherwise, create a new call-site.
-        if (MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf)
+        if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf)
           CallSites.push_back(Site);
         else {
           // SjLj EH must maintain the call sites in the order assigned
@@ -553,9 +544,6 @@
           CallSites[SiteNo - 1] = Site;
         }
         PreviousIsInvoke = true;
-      } else {
-        // Create a gap.
-        PreviousIsInvoke = false;
       }
     }
   }
@@ -564,7 +552,7 @@
   // function may throw, create a call-site entry with no landing pad for the
   // region following the try-range.
   if (SawPotentiallyThrowing &&
-      MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+      Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
     CallSiteEntry Site = { LastLabel, 0, 0, 0 };
     CallSites.push_back(Site);
   }
@@ -591,10 +579,9 @@
 ///  3. Type ID table contains references to all the C++ typeinfo for all
 ///     catches in the function.  This tables is reverse indexed base 1.
 void DwarfException::EmitExceptionTable() {
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-  if (PadInfos.empty()) return;
 
   // Sort the landing pads in order of their type ids.  This is used to fold
   // duplicate actions.
@@ -619,7 +606,7 @@
   for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
     const LandingPadInfo *LandingPad = LandingPads[i];
     for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
-      unsigned BeginLabel = LandingPad->BeginLabels[j];
+      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
       assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
       PadRange P = { i, j };
       PadMap[BeginLabel] = P;
@@ -633,18 +620,19 @@
   // Final tallies.
 
   // Call sites.
-  const unsigned SiteStartSize  = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
-  const unsigned SiteLengthSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
-  const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
-  bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+  bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
   bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+  
   unsigned CallSiteTableLength;
-
   if (IsSJLJ)
     CallSiteTableLength = 0;
-  else
-    CallSiteTableLength = CallSites.size() *
-      (SiteStartSize + SiteLengthSize + LandingPadSize);
+  else {
+    unsigned SiteStartSize  = 4; // dwarf::DW_EH_PE_udata4
+    unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
+    unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
+    CallSiteTableLength = 
+      CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
+  }
 
   for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
     CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
@@ -661,7 +649,8 @@
     // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
     // that we're omitting that bit.
     TTypeEncoding = dwarf::DW_EH_PE_omit;
-    TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr);
+    // dwarf::DW_EH_PE_absptr
+    TypeFormatSize = Asm->getTargetData().getPointerSize();
   } else {
     // Okay, we have actual filters or typeinfos to emit.  As such, we need to
     // pick a type encoding for them.  We're about to emit a list of pointers to
@@ -691,7 +680,7 @@
     // in target-independent code.
     //
     TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
-    TypeFormatSize = SizeOfEncodedValue(TTypeEncoding);
+    TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
   }
 
   // Begin the exception table.
@@ -699,19 +688,20 @@
   Asm->EmitAlignment(2, 0, 0, false);
 
   // Emit the LSDA.
-  O << "GCC_except_table" << SubprogramCount << ":\n";
-  EmitLabel("exception", SubprogramCount);
+  MCSymbol *GCCETSym = 
+    Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+                                      Twine(Asm->getFunctionNumber()));
+  Asm->OutStreamer.EmitLabel(GCCETSym);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
+                                                Asm->getFunctionNumber()));
 
-  if (IsSJLJ) {
-    SmallString<16> LSDAName;
-    raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
-      "_LSDA_" << Asm->getFunctionNumber();
-    O << LSDAName.str() << ":\n";
-  }
+  if (IsSJLJ)
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
+                                                  Asm->getFunctionNumber()));
 
   // Emit the LSDA header.
-  EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
-  EmitEncodingByte(TTypeEncoding, "@TType");
+  Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+  Asm->EmitEncodingByte(TTypeEncoding, "@TType");
 
   // The type infos need to be aligned. GCC does this by inserting padding just
   // before the type infos. However, this changes the size of the exception
@@ -748,16 +738,16 @@
   if (HaveTTData) {
     // Account for any extra padding that will be added to the call site table
     // length.
-    EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+    Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
     SizeAlign = 0;
   }
 
   // SjLj Exception handling
   if (IsSJLJ) {
-    EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
 
     // Add extra padding if it wasn't added to the TType base offset.
-    EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
 
     // Emit the landing pad site information.
     unsigned idx = 0;
@@ -767,16 +757,16 @@
 
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      EmitULEB128(idx, "Landing pad");
+      Asm->EmitULEB128(idx, "Landing pad");
 
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
       // the action table), and 0 indicates that there are no actions.
-      EmitULEB128(S.Action, "Action");
+      Asm->EmitULEB128(S.Action, "Action");
     }
   } else {
     // DWARF Exception handling
-    assert(MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
+    assert(Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
 
     // The call-site table is a list of all call sites that may throw an
     // exception (including C++ 'throw' statements) in the procedure
@@ -797,100 +787,101 @@
     // supposed to throw.
 
     // Emit the landing pad call site table.
-    EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
 
     // Add extra padding if it wasn't added to the TType base offset.
-    EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
 
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
-      const char *BeginTag;
-      unsigned BeginNumber;
-
-      if (!S.BeginLabel) {
-        BeginTag = "eh_func_begin";
-        BeginNumber = SubprogramCount;
-      } else {
-        BeginTag = "label";
-        BeginNumber = S.BeginLabel;
-      }
-
+      
+      MCSymbol *EHFuncBeginSym =
+        Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+      
+      MCSymbol *BeginLabel = S.BeginLabel;
+      if (BeginLabel == 0)
+        BeginLabel = EHFuncBeginSym;
+      MCSymbol *EndLabel = S.EndLabel;
+      if (EndLabel == 0)
+        EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+        
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
-      EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount,
-                        true, true);
-      EOL("Region start");
+      Asm->OutStreamer.AddComment("Region start");
+      Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+      
+      Asm->OutStreamer.AddComment("Region length");
+      Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-      if (!S.EndLabel)
-        EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber,
-                       true);
-      else
-        EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true);
-
-      EOL("Region length");
 
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      if (!S.PadLabel) {
-        Asm->OutStreamer.AddComment("Landing pad");
+      Asm->OutStreamer.AddComment("Landing pad");
+      if (!S.PadLabel)
         Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-      } else {
-        EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
-                          true, true);
-        EOL("Landing pad");
-      }
+      else
+        Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
 
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
       // the action table), and 0 indicates that there are no actions.
-      EmitULEB128(S.Action, "Action");
+      Asm->EmitULEB128(S.Action, "Action");
     }
   }
 
   // Emit the Action Table.
-  if (Actions.size() != 0) EOL("-- Action Record Table --");
+  if (Actions.size() != 0) {
+    Asm->OutStreamer.AddComment("-- Action Record Table --");
+    Asm->OutStreamer.AddBlankLine();
+  }
+  
   for (SmallVectorImpl<ActionEntry>::const_iterator
          I = Actions.begin(), E = Actions.end(); I != E; ++I) {
     const ActionEntry &Action = *I;
-    EOL("Action Record:");
+    Asm->OutStreamer.AddComment("Action Record");
+    Asm->OutStreamer.AddBlankLine();
 
     // Type Filter
     //
     //   Used by the runtime to match the type of the thrown exception to the
     //   type of the catch clauses or the types in the exception specification.
-    EmitSLEB128(Action.ValueForTypeID, "  TypeInfo index");
+    Asm->EmitSLEB128(Action.ValueForTypeID, "  TypeInfo index");
 
     // Action Record
     //
     //   Self-relative signed displacement in bytes of the next action record,
     //   or 0 if there is no next action record.
-    EmitSLEB128(Action.NextAction, "  Next action");
+    Asm->EmitSLEB128(Action.NextAction, "  Next action");
   }
 
   // Emit the Catch TypeInfos.
-  if (!TypeInfos.empty()) EOL("-- Catch TypeInfos --");
-  for (std::vector<GlobalVariable *>::const_reverse_iterator
+  if (!TypeInfos.empty()) {
+    Asm->OutStreamer.AddComment("-- Catch TypeInfos --");
+    Asm->OutStreamer.AddBlankLine();
+  }
+  for (std::vector<const GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
 
-    if (GV) {
-      EmitReference(GV, TTypeEncoding);
-      EOL("TypeInfo");
-    } else {
-      PrintRelDirective(TTypeEncoding);
-      O << "0x0";
-      EOL("");
-    }
+    Asm->OutStreamer.AddComment("TypeInfo");
+    if (GV)
+      Asm->EmitReference(GV, TTypeEncoding);
+    else
+      Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding),
+                                    0);
   }
 
   // Emit the Exception Specifications.
-  if (!FilterIds.empty()) EOL("-- Filter IDs --");
+  if (!FilterIds.empty()) {
+    Asm->OutStreamer.AddComment("-- Filter IDs --");
+    Asm->OutStreamer.AddBlankLine();
+  }
   for (std::vector<unsigned>::const_iterator
          I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
     unsigned TypeID = *I;
-    EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
+    Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
   }
 
   Asm->EmitAlignment(2, 0, 0, false);
@@ -899,16 +890,13 @@
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
-  if (MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
+  if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
     return;
 
   if (!shouldEmitMovesModule && !shouldEmitTableModule)
     return;
 
-  if (TimePassesIsEnabled)
-    ExceptionTimer->startTimer();
-
-  const std::vector<Function *> Personalities = MMI->getPersonalities();
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
 
   for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
     EmitCIE(Personalities[I], I);
@@ -916,42 +904,27 @@
   for (std::vector<FunctionEHFrameInfo>::iterator
          I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
     EmitFDE(*I);
-
-  if (TimePassesIsEnabled)
-    ExceptionTimer->stopTimer();
 }
 
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  if (!MMI || !MAI->doesSupportExceptionHandling()) return;
-
-  if (TimePassesIsEnabled)
-    ExceptionTimer->startTimer();
-
-  this->MF = MF;
   shouldEmitTable = shouldEmitMoves = false;
 
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
   // If any landing pads survive, we need an EH table.
-  if (!MMI->getLandingPads().empty())
-    shouldEmitTable = true;
+  shouldEmitTable = !MMI->getLandingPads().empty();
 
   // See if we need frame move info.
-  if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
-    shouldEmitMoves = true;
+  shouldEmitMoves =
+    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
 
   if (shouldEmitMoves || shouldEmitTable)
     // Assumes in correct section after the entry point.
-    EmitLabel("eh_func_begin", ++SubprogramCount);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
 
   shouldEmitTableModule |= shouldEmitTable;
   shouldEmitMovesModule |= shouldEmitMoves;
-
-  if (TimePassesIsEnabled)
-    ExceptionTimer->stopTimer();
 }
 
 /// EndFunction - Gather and emit post-function exception information.
@@ -959,27 +932,30 @@
 void DwarfException::EndFunction() {
   if (!shouldEmitMoves && !shouldEmitTable) return;
 
-  if (TimePassesIsEnabled)
-    ExceptionTimer->startTimer();
-
-  EmitLabel("eh_func_end", SubprogramCount);
-  EmitExceptionTable();
-
-  MCSymbol *FunctionEHSym =
-    Asm->GetSymbolWithGlobalValueBase(MF->getFunction(), ".eh",
-                                      Asm->MAI->is_EHSymbolPrivate());
-  
-  // Save EH frame information
-  EHFrames.push_back(FunctionEHFrameInfo(FunctionEHSym, SubprogramCount,
-                                         MMI->getPersonalityIndex(),
-                                         MF->getFrameInfo()->hasCalls(),
-                                         !MMI->getLandingPads().empty(),
-                                         MMI->getFrameMoves(),
-                                         MF->getFunction()));
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
 
   // Record if this personality index uses a landing pad.
-  UsesLSDA[MMI->getPersonalityIndex()] |= !MMI->getLandingPads().empty();
+  bool HasLandingPad = !MMI->getLandingPads().empty();
+  UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
+  
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
 
-  if (TimePassesIsEnabled)
-    ExceptionTimer->stopTimer();
+  if (HasLandingPad)
+    EmitExceptionTable();
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  MCSymbol *FunctionEHSym =
+    Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
+                                      TLOF.isFunctionEHFrameSymbolPrivate());
+  
+  // Save EH frame information
+  EHFrames.push_back(FunctionEHFrameInfo(FunctionEHSym,
+                                         Asm->getFunctionNumber(),
+                                         MMI->getPersonalityIndex(),
+                                         Asm->MF->getFrameInfo()->hasCalls(),
+                                         !MMI->getLandingPads().empty(),
+                                         MMI->getFrameMoves(),
+                                         Asm->MF->getFunction()));
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 3db1a00..5839f8c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -14,25 +14,33 @@
 #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 
-#include "DIE.h"
-#include "DwarfPrinter.h"
-#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/ADT/DenseMap.h"
-#include <string>
+#include <vector>
 
 namespace llvm {
 
+template <typename T> class SmallVectorImpl;
 struct LandingPadInfo;
 class MachineModuleInfo;
+class MachineMove;
+class MachineInstr;
+class MachineFunction;
 class MCAsmInfo;
 class MCExpr;
-class Timer;
-class raw_ostream;
+class MCSymbol;
+class Function;
+class AsmPrinter;
 
 //===----------------------------------------------------------------------===//
 /// DwarfException - Emits Dwarf exception handling directives.
 ///
-class DwarfException : public DwarfPrinter {
+class DwarfException {
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  /// MMI - Collected machine module information.
+  MachineModuleInfo *MMI;
+
   struct FunctionEHFrameInfo {
     MCSymbol *FunctionEHSym;  // L_foo.eh
     unsigned Number;
@@ -73,9 +81,6 @@
   /// should be emitted.
   bool shouldEmitMovesModule;
 
-  /// ExceptionTimer - Timer for the Dwarf exception writer.
-  Timer *ExceptionTimer;
-
   /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
   /// that is shared among many Frame Description Entries.  There is at least
   /// one CIE in every non-empty .debug_frame section.
@@ -111,13 +116,6 @@
   /// PadLT - Order landing pads lexicographically by type id.
   static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
 
-  struct KeyInfo {
-    static inline unsigned getEmptyKey() { return -1U; }
-    static inline unsigned getTombstoneKey() { return -2U; }
-    static unsigned getHashValue(const unsigned &Key) { return Key; }
-    static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
-  };
-
   /// PadRange - Structure holding a try-range and the associated landing pad.
   struct PadRange {
     // The index of the landing pad.
@@ -126,7 +124,7 @@
     unsigned RangeIndex;
   };
 
-  typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
+  typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
 
   /// ActionEntry - Structure describing an entry in the actions table.
   struct ActionEntry {
@@ -138,11 +136,11 @@
   /// CallSiteEntry - Structure describing an entry in the call-site table.
   struct CallSiteEntry {
     // The 'try-range' is BeginLabel .. EndLabel.
-    unsigned BeginLabel; // zero indicates the start of the function.
-    unsigned EndLabel;   // zero indicates the end of the function.
+    MCSymbol *BeginLabel; // zero indicates the start of the function.
+    MCSymbol *EndLabel;   // zero indicates the end of the function.
 
     // The landing pad starts at PadLabel.
-    unsigned PadLabel;   // zero indicates that there is no landing pad.
+    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
     unsigned Action;
   };
 
@@ -169,24 +167,12 @@
                             const SmallVectorImpl<unsigned> &FirstActions);
   void EmitExceptionTable();
 
-  /// CreateLabelDiff - Emit a label and subtract it from the expression we
-  /// already have.  This is equivalent to emitting "foo - .", but we have to
-  /// emit the label for "." directly.
-  const MCExpr *CreateLabelDiff(const MCExpr *ExprRef, const char *LabelName,
-                                unsigned Index);
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
-  DwarfException(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T);
-  virtual ~DwarfException();
-
-  /// BeginModule - Emit all exception information that should come prior to the
-  /// content.
-  void BeginModule(Module *m, MachineModuleInfo *mmi) {
-    this->M = m;
-    this->MMI = mmi;
-  }
+  DwarfException(AsmPrinter *A);
+  ~DwarfException();
 
   /// EndModule - Emit all exception information that should come after the
   /// content.
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
deleted file mode 100644
index 6e9293a..0000000
--- a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-//===--- lib/CodeGen/DwarfLabel.cpp - Dwarf Label -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// DWARF Labels
-// 
-//===----------------------------------------------------------------------===//
-
-#include "DwarfLabel.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-/// Profile - Used to gather unique data for the folding set.
-///
-void DWLabel::Profile(FoldingSetNodeID &ID) const {
-  ID.AddString(Tag);
-  ID.AddInteger(Number);
-}
-
-#ifndef NDEBUG
-void DWLabel::print(raw_ostream &O) const {
-  O << "." << Tag;
-  if (Number) O << Number;
-}
-#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h
deleted file mode 100644
index 0c0cc4b..0000000
--- a/lib/CodeGen/AsmPrinter/DwarfLabel.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===--- lib/CodeGen/DwarfLabel.h - Dwarf Label -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// DWARF Labels.
-// 
-//===----------------------------------------------------------------------===//
-
-#ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__
-#define CODEGEN_ASMPRINTER_DWARFLABEL_H__
-
-namespace llvm {
-  class FoldingSetNodeID;
-  class raw_ostream;
-
-  //===--------------------------------------------------------------------===//
-  /// DWLabel - Labels are used to track locations in the assembler file.
-  /// Labels appear in the form @verbatim <prefix><Tag><Number> @endverbatim,
-  /// where the tag is a category of label (Ex. location) and number is a value
-  /// unique in that category.
-  class DWLabel {
-    /// Tag - Label category tag. Should always be a statically declared C
-    /// string.
-    /// 
-    const char *Tag;
-
-    /// Number - Value to make label unique.
-    /// 
-    unsigned Number;
-  public:
-    DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {}
-
-    // Accessors.
-    const char *getTag() const { return Tag; }
-    unsigned getNumber() const { return Number; }
-
-    /// Profile - Used to gather unique data for the folding set.
-    ///
-    void Profile(FoldingSetNodeID &ID) const;
-
-#ifndef NDEBUG
-    void print(raw_ostream &O) const;
-#endif
-  };
-} // end llvm namespace
-
-#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
deleted file mode 100644
index 28ff0eb..0000000
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ /dev/null
@@ -1,402 +0,0 @@
-//===--- lib/CodeGen/DwarfPrinter.cpp - Dwarf Printer ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Emit general DWARF directives.
-//
-//===----------------------------------------------------------------------===//
-
-#include "DwarfPrinter.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
-                           const char *flavor)
-: O(OS), Asm(A), MAI(T), TD(Asm->TM.getTargetData()),
-  RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
-  SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
-
-/// SizeOfEncodedValue - Return the size of the encoding in bytes.
-unsigned DwarfPrinter::SizeOfEncodedValue(unsigned Encoding) const {
-  if (Encoding == dwarf::DW_EH_PE_omit)
-    return 0;
-
-  switch (Encoding & 0x07) {
-  case dwarf::DW_EH_PE_absptr:
-    return TD->getPointerSize();
-  case dwarf::DW_EH_PE_udata2:
-    return 2;
-  case dwarf::DW_EH_PE_udata4:
-    return 4;
-  case dwarf::DW_EH_PE_udata8:
-    return 8;
-  }
-
-  assert(0 && "Invalid encoded value.");
-  return 0;
-}
-
-void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const {
-  if (isInSection && MAI->getDwarfSectionOffsetDirective())
-    O << MAI->getDwarfSectionOffsetDirective();
-  else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t))
-    O << MAI->getData32bitsDirective();
-  else
-    O << MAI->getData64bitsDirective();
-}
-
-void DwarfPrinter::PrintRelDirective(unsigned Encoding) const {
-  unsigned Size = SizeOfEncodedValue(Encoding);
-  assert((Size == 4 || Size == 8) && "Do not support other types or rels!");
-
-  O << (Size == 4 ?
-        MAI->getData32bitsDirective() : MAI->getData64bitsDirective());
-}
-
-/// EOL - Print a newline character to asm stream.  If a comment is present
-/// then it will be printed first.  Comments should not contain '\n'.
-void DwarfPrinter::EOL(const Twine &Comment) const {
-  if (Asm->VerboseAsm && !Comment.isTriviallyEmpty()) {
-    Asm->O.PadToColumn(MAI->getCommentColumn());
-    Asm->O << Asm->MAI->getCommentString() << ' ' << Comment;
-  }
-  Asm->O << '\n';
-}
-
-static const char *DecodeDWARFEncoding(unsigned Encoding) {
-  switch (Encoding) {
-  case dwarf::DW_EH_PE_absptr: return "absptr";
-  case dwarf::DW_EH_PE_omit:   return "omit";
-  case dwarf::DW_EH_PE_pcrel:  return "pcrel";
-  case dwarf::DW_EH_PE_udata4: return "udata4";
-  case dwarf::DW_EH_PE_udata8: return "udata8";
-  case dwarf::DW_EH_PE_sdata4: return "sdata4";
-  case dwarf::DW_EH_PE_sdata8: return "sdata8";
-  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
-  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
-  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
-  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
-  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
-    return "indirect pcrel udata4";
-  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
-    return "indirect pcrel sdata4";
-  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
-    return "indirect pcrel udata8";
-  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
-    return "indirect pcrel sdata8";
-  }
-  
-  return "<unknown encoding>";
-}
-
-/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
-/// encoding.  If verbose assembly output is enabled, we output comments
-/// describing the encoding.  Desc is an optional string saying what the
-/// encoding is specifying (e.g. "LSDA").
-void DwarfPrinter::EmitEncodingByte(unsigned Val, const char *Desc) {
-  if (Asm->VerboseAsm) {
-    if (Desc != 0)
-      Asm->OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
-                                  Twine(DecodeDWARFEncoding(Val)));
-    else
-      Asm->OutStreamer.AddComment(Twine("Encoding = ") +
-                                  DecodeDWARFEncoding(Val));
-  }
-
-  Asm->OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
-}
-
-/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
-void DwarfPrinter::EmitCFAByte(unsigned Val) {
-  if (Asm->VerboseAsm) {
-    if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
-      Asm->OutStreamer.AddComment("DW_CFA_offset + Reg (" + 
-                                  Twine(Val-dwarf::DW_CFA_offset) + ")");
-    else
-      Asm->OutStreamer.AddComment(dwarf::CallFrameString(Val));
-  }
-  Asm->OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
-}
-
-/// EmitSLEB128 - emit the specified signed leb128 value.
-void DwarfPrinter::EmitSLEB128(int Value, const char *Desc) const {
-  if (Asm->VerboseAsm && Desc)
-    Asm->OutStreamer.AddComment(Desc);
-    
-  if (MAI->hasLEB128()) {
-    O << "\t.sleb128\t" << Value;
-    Asm->OutStreamer.AddBlankLine();
-    return;
-  }
-
-  // If we don't have .sleb128, emit as .bytes.
-  int Sign = Value >> (8 * sizeof(Value) - 1);
-  bool IsMore;
-  
-  do {
-    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
-    Value >>= 7;
-    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
-    if (IsMore) Byte |= 0x80;
-    Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
-  } while (IsMore);
-}
-
-/// EmitULEB128 - emit the specified signed leb128 value.
-void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc,
-                               unsigned PadTo) const {
-  if (Asm->VerboseAsm && Desc)
-    Asm->OutStreamer.AddComment(Desc);
- 
-  if (MAI->hasLEB128() && PadTo == 0) {
-    O << "\t.uleb128\t" << Value;
-    Asm->OutStreamer.AddBlankLine();
-    return;
-  }
-  
-  // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
-  do {
-    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
-    Value >>= 7;
-    if (Value || PadTo != 0) Byte |= 0x80;
-    Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
-  } while (Value);
-
-  if (PadTo) {
-    if (PadTo > 1)
-      Asm->OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
-    Asm->OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
-  }
-}
-
-
-/// PrintLabelName - Print label name in form used by Dwarf writer.
-///
-void DwarfPrinter::PrintLabelName(const char *Tag, unsigned Number) const {
-  O << MAI->getPrivateGlobalPrefix() << Tag;
-  if (Number) O << Number;
-}
-void DwarfPrinter::PrintLabelName(const char *Tag, unsigned Number,
-                                  const char *Suffix) const {
-  O << MAI->getPrivateGlobalPrefix() << Tag;
-  if (Number) O << Number;
-  O << Suffix;
-}
-
-/// EmitLabel - Emit location label for internal use by Dwarf.
-///
-void DwarfPrinter::EmitLabel(const char *Tag, unsigned Number) const {
-  PrintLabelName(Tag, Number);
-  O << ":\n";
-}
-
-/// EmitReference - Emit a reference to a label.
-///
-void DwarfPrinter::EmitReference(const char *Tag, unsigned Number,
-                                 bool IsPCRelative, bool Force32Bit) const {
-  PrintRelDirective(Force32Bit);
-  PrintLabelName(Tag, Number);
-  if (IsPCRelative) O << "-" << MAI->getPCSymbol();
-}
-void DwarfPrinter::EmitReference(const std::string &Name, bool IsPCRelative,
-                                 bool Force32Bit) const {
-  PrintRelDirective(Force32Bit);
-  O << Name;
-  if (IsPCRelative) O << "-" << MAI->getPCSymbol();
-}
-
-void DwarfPrinter::EmitReference(const MCSymbol *Sym, bool IsPCRelative,
-                                 bool Force32Bit) const {
-  PrintRelDirective(Force32Bit);
-  O << *Sym;
-  if (IsPCRelative) O << "-" << MAI->getPCSymbol();
-}
-
-void DwarfPrinter::EmitReference(const char *Tag, unsigned Number,
-                                 unsigned Encoding) const {
-  SmallString<64> Name;
-  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
-                            << Tag << Number;
-
-  MCSymbol *Sym = Asm->OutContext.GetOrCreateSymbol(Name.str());
-  EmitReference(Sym, Encoding);
-}
-
-void DwarfPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  PrintRelDirective(Encoding);
-  O << *TLOF.getSymbolForDwarfReference(Sym, Asm->MMI, Encoding);;
-}
-
-void DwarfPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const {
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  PrintRelDirective(Encoding);
-  O << *TLOF.getSymbolForDwarfGlobalReference(GV, Asm->Mang,
-                                              Asm->MMI, Encoding);;
-}
-
-/// EmitDifference - Emit the difference between two labels.  If this assembler
-/// supports .set, we emit a .set of a temporary and then use it in the .word.
-void DwarfPrinter::EmitDifference(const char *TagHi, unsigned NumberHi,
-                                  const char *TagLo, unsigned NumberLo,
-                                  bool IsSmall) {
-  if (MAI->hasSetDirective()) {
-    // FIXME: switch to OutStreamer.EmitAssignment.
-    O << "\t.set\t";
-    PrintLabelName("set", SetCounter, Flavor);
-    O << ",";
-    PrintLabelName(TagHi, NumberHi);
-    O << "-";
-    PrintLabelName(TagLo, NumberLo);
-    O << "\n";
-
-    PrintRelDirective(IsSmall);
-    PrintLabelName("set", SetCounter, Flavor);
-    ++SetCounter;
-  } else {
-    PrintRelDirective(IsSmall);
-    PrintLabelName(TagHi, NumberHi);
-    O << "-";
-    PrintLabelName(TagLo, NumberLo);
-  }
-}
-
-void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section,
-                                     unsigned LabelNumber,
-                                     unsigned SectionNumber,
-                                     bool IsSmall, bool isEH,
-                                     bool useSet) {
-  bool printAbsolute = false;
-  if (isEH)
-    printAbsolute = MAI->isAbsoluteEHSectionOffsets();
-  else
-    printAbsolute = MAI->isAbsoluteDebugSectionOffsets();
-
-  if (MAI->hasSetDirective() && useSet) {
-    // FIXME: switch to OutStreamer.EmitAssignment.
-    O << "\t.set\t";
-    PrintLabelName("set", SetCounter, Flavor);
-    O << ",";
-    PrintLabelName(Label, LabelNumber);
-
-    if (!printAbsolute) {
-      O << "-";
-      PrintLabelName(Section, SectionNumber);
-    }
-
-    O << "\n";
-    PrintRelDirective(IsSmall);
-    PrintLabelName("set", SetCounter, Flavor);
-    ++SetCounter;
-  } else {
-    PrintRelDirective(IsSmall, true);
-    PrintLabelName(Label, LabelNumber);
-
-    if (!printAbsolute) {
-      O << "-";
-      PrintLabelName(Section, SectionNumber);
-    }
-  }
-}
-
-/// EmitFrameMoves - Emit frame instructions to describe the layout of the
-/// frame.
-void DwarfPrinter::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
-                                  const std::vector<MachineMove> &Moves,
-                                  bool isEH) {
-  int stackGrowth =
-    Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
-    TargetFrameInfo::StackGrowsUp ?
-    TD->getPointerSize() : -TD->getPointerSize();
-  bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0;
-
-  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
-    const MachineMove &Move = Moves[i];
-    unsigned LabelID = Move.getLabelID();
-
-    if (LabelID) {
-      LabelID = MMI->MappedLabel(LabelID);
-
-      // Throw out move if the label is invalid.
-      if (!LabelID) continue;
-    }
-
-    const MachineLocation &Dst = Move.getDestination();
-    const MachineLocation &Src = Move.getSource();
-
-    // Advance row if new location.
-    if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
-      EmitCFAByte(dwarf::DW_CFA_advance_loc4);
-      EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true);
-      Asm->O << '\n';
-
-      BaseLabelID = LabelID;
-      BaseLabel = "label";
-      IsLocal = true;
-    }
-
-    // If advancing cfa.
-    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
-      if (!Src.isReg()) {
-        if (Src.getReg() == MachineLocation::VirtualFP) {
-          EmitCFAByte(dwarf::DW_CFA_def_cfa_offset);
-        } else {
-          EmitCFAByte(dwarf::DW_CFA_def_cfa);
-          EmitULEB128(RI->getDwarfRegNum(Src.getReg(), isEH), "Register");
-        }
-
-        int Offset = -Src.getOffset();
-        EmitULEB128(Offset, "Offset");
-      } else {
-        llvm_unreachable("Machine move not supported yet.");
-      }
-    } else if (Src.isReg() &&
-               Src.getReg() == MachineLocation::VirtualFP) {
-      if (Dst.isReg()) {
-        EmitCFAByte(dwarf::DW_CFA_def_cfa_register);
-        EmitULEB128(RI->getDwarfRegNum(Dst.getReg(), isEH), "Register");
-      } else {
-        llvm_unreachable("Machine move not supported yet.");
-      }
-    } else {
-      unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
-      int Offset = Dst.getOffset() / stackGrowth;
-
-      if (Offset < 0) {
-        EmitCFAByte(dwarf::DW_CFA_offset_extended_sf);
-        EmitULEB128(Reg, "Reg");
-        EmitSLEB128(Offset, "Offset");
-      } else if (Reg < 64) {
-        EmitCFAByte(dwarf::DW_CFA_offset + Reg);
-        EmitULEB128(Offset, "Offset");
-      } else {
-        EmitCFAByte(dwarf::DW_CFA_offset_extended);
-        EmitULEB128(Reg, "Reg");
-        EmitULEB128(Offset, "Offset");
-      }
-    }
-  }
-}
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
deleted file mode 100644
index bd715f2..0000000
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ /dev/null
@@ -1,177 +0,0 @@
-//===--- lib/CodeGen/DwarfPrinter.h - Dwarf Printer -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Emit general DWARF directives.
-// 
-//===----------------------------------------------------------------------===//
-
-#ifndef CODEGEN_ASMPRINTER_DWARFPRINTER_H__
-#define CODEGEN_ASMPRINTER_DWARFPRINTER_H__
-
-#include "DwarfLabel.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/FormattedStream.h"
-#include <vector>
-
-namespace llvm {
-class AsmPrinter;
-class MachineFunction;
-class MachineModuleInfo;
-class Module;
-class MCAsmInfo;
-class TargetData;
-class TargetRegisterInfo;
-class GlobalValue;
-class MCSymbol;
-class Twine;
-
-class DwarfPrinter {
-protected:
-  ~DwarfPrinter() {}
-
-  //===-------------------------------------------------------------==---===//
-  // Core attributes used by the DWARF printer.
-  //
-
-  /// O - Stream to .s file.
-  raw_ostream &O;
-
-  /// Asm - Target of Dwarf emission.
-  AsmPrinter *Asm;
-
-  /// MAI - Target asm information.
-  const MCAsmInfo *MAI;
-
-  /// TD - Target data.
-  const TargetData *TD;
-
-  /// RI - Register Information.
-  const TargetRegisterInfo *RI;
-
-  /// M - Current module.
-  Module *M;
-
-  /// MF - Current machine function.
-  const MachineFunction *MF;
-
-  /// MMI - Collected machine module information.
-  MachineModuleInfo *MMI;
-
-  /// SubprogramCount - The running count of functions being compiled.
-  unsigned SubprogramCount;
-
-  /// Flavor - A unique string indicating what dwarf producer this is, used to
-  /// unique labels.
-  const char * const Flavor;
-
-  /// SetCounter - A unique number for each '.set' directive.
-  unsigned SetCounter;
-
-  DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
-               const char *flavor);
-public:
-  
-  //===------------------------------------------------------------------===//
-  // Accessors.
-  //
-  const AsmPrinter *getAsm() const { return Asm; }
-  MachineModuleInfo *getMMI() const { return MMI; }
-  const MCAsmInfo *getMCAsmInfo() const { return MAI; }
-  const TargetData *getTargetData() const { return TD; }
-
-  /// SizeOfEncodedValue - Return the size of the encoding in bytes.
-  unsigned SizeOfEncodedValue(unsigned Encoding) const;
-
-  void PrintRelDirective(unsigned Encoding) const;
-  void PrintRelDirective(bool Force32Bit = false,
-                         bool isInSection = false) const;
-
-  /// EOL - Print a newline character to asm stream.  If a comment is present
-  /// then it will be printed first.  Comments should not contain '\n'.
-  void EOL(const Twine &Comment) const;
-  
-  /// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
-  /// encoding.  If verbose assembly output is enabled, we output comments
-  /// describing the encoding.  Desc is a string saying what the encoding is
-  /// specifying (e.g. "LSDA").
-  void EmitEncodingByte(unsigned Val, const char *Desc);
-  
-  /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
-  void EmitCFAByte(unsigned Val);
-  
-  
-  /// EmitSLEB128 - emit the specified signed leb128 value.
-  void EmitSLEB128(int Value, const char *Desc) const;
-
-  /// EmitULEB128 - emit the specified unsigned leb128 value.
-  void EmitULEB128(unsigned Value, const char *Desc = 0,
-                   unsigned PadTo = 0) const;
-
-  
-  /// PrintLabelName - Print label name in form used by Dwarf writer.
-  ///
-  void PrintLabelName(const DWLabel &Label) const {
-    PrintLabelName(Label.getTag(), Label.getNumber());
-  }
-  void PrintLabelName(const char *Tag, unsigned Number) const;
-  void PrintLabelName(const char *Tag, unsigned Number,
-                      const char *Suffix) const;
-
-  /// EmitLabel - Emit location label for internal use by Dwarf.
-  ///
-  void EmitLabel(const DWLabel &Label) const {
-    EmitLabel(Label.getTag(), Label.getNumber());
-  }
-  void EmitLabel(const char *Tag, unsigned Number) const;
-
-  /// EmitReference - Emit a reference to a label.
-  ///
-  void EmitReference(const DWLabel &Label, bool IsPCRelative = false,
-                     bool Force32Bit = false) const {
-    EmitReference(Label.getTag(), Label.getNumber(),
-                  IsPCRelative, Force32Bit);
-  }
-  void EmitReference(const char *Tag, unsigned Number,
-                     bool IsPCRelative = false,
-                     bool Force32Bit = false) const;
-  void EmitReference(const std::string &Name, bool IsPCRelative = false,
-                     bool Force32Bit = false) const;
-  void EmitReference(const MCSymbol *Sym, bool IsPCRelative = false,
-                     bool Force32Bit = false) const;
-
-  void EmitReference(const char *Tag, unsigned Number, unsigned Encoding) const;
-  void EmitReference(const MCSymbol *Sym, unsigned Encoding) const;
-  void EmitReference(const GlobalValue *GV, unsigned Encoding) const;
-
-  /// EmitDifference - Emit the difference between two labels.
-  void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo,
-                      bool IsSmall = false) {
-    EmitDifference(LabelHi.getTag(), LabelHi.getNumber(),
-                   LabelLo.getTag(), LabelLo.getNumber(),
-                   IsSmall);
-  }
-  void EmitDifference(const char *TagHi, unsigned NumberHi,
-                      const char *TagLo, unsigned NumberLo,
-                      bool IsSmall = false);
-
-  void EmitSectionOffset(const char* Label, const char* Section,
-                         unsigned LabelNumber, unsigned SectionNumber,
-                         bool IsSmall = false, bool isEH = false,
-                         bool useSet = true);
-
-  /// EmitFrameMoves - Emit frame instructions to describe the layout of the
-  /// frame.
-  void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
-                      const std::vector<MachineMove> &Moves, bool isEH);
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
deleted file mode 100644
index 08e1bbc..0000000
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing dwarf info into asm files.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/DwarfWriter.h"
-#include "DwarfDebug.h"
-#include "DwarfException.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-
-using namespace llvm;
-
-static RegisterPass<DwarfWriter>
-X("dwarfwriter", "DWARF Information Writer");
-char DwarfWriter::ID = 0;
-
-//===----------------------------------------------------------------------===//
-/// DwarfWriter Implementation
-///
-
-DwarfWriter::DwarfWriter()
-  : ImmutablePass(&ID), DD(0), DE(0) {}
-
-DwarfWriter::~DwarfWriter() {
-  delete DE;
-  delete DD;
-}
-
-/// BeginModule - Emit all Dwarf sections that should come prior to the
-/// content.
-void DwarfWriter::BeginModule(Module *M,
-                              MachineModuleInfo *MMI,
-                              raw_ostream &OS, AsmPrinter *A,
-                              const MCAsmInfo *T) {
-  DE = new DwarfException(OS, A, T);
-  DD = new DwarfDebug(OS, A, T);
-  DE->BeginModule(M, MMI);
-  DD->beginModule(M, MMI);
-}
-
-/// EndModule - Emit all Dwarf sections that should come after the content.
-///
-void DwarfWriter::EndModule() {
-  DE->EndModule();
-  DD->endModule();
-  delete DD; DD = 0;
-  delete DE; DE = 0;
-}
-
-/// BeginFunction - Gather pre-function debug information.  Assumes being
-/// emitted immediately after the function entry point.
-void DwarfWriter::BeginFunction(const MachineFunction *MF) {
-  DE->BeginFunction(MF);
-  DD->beginFunction(MF);
-}
-
-/// EndFunction - Gather and emit post-function debug information.
-///
-void DwarfWriter::EndFunction(const MachineFunction *MF) {
-  DD->endFunction(MF);
-  DE->EndFunction();
-
-  if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI())
-    // Clear function debug information.
-    MMI->EndFunction();
-}
-
-/// RecordSourceLine - Records location information and associates it with a 
-/// label. Returns a unique label ID used to generate a label and provide
-/// correspondence to the source line list.
-unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, 
-                                       MDNode *Scope) {
-  return DD->recordSourceLine(Line, Col, Scope);
-}
-
-/// getRecordSourceLineCount - Count source lines.
-unsigned DwarfWriter::getRecordSourceLineCount() {
-  return DD->getSourceLineCount();
-}
-
-/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
-/// be emitted.
-bool DwarfWriter::ShouldEmitDwarfDebug() const {
-  return DD && DD->ShouldEmitDwarfDebug();
-}
-
-void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) {
-  DD->beginScope(MI, L);
-}
-void DwarfWriter::EndScope(const MachineInstr *MI) {
-  DD->endScope(MI);
-}
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index a9502fd..38bc584 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -15,11 +15,15 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/Module.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include <ctype.h>
@@ -29,11 +33,8 @@
 
   class OcamlGCMetadataPrinter : public GCMetadataPrinter {
   public:
-    void beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                       const MCAsmInfo &MAI);
-
-    void finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                        const MCAsmInfo &MAI);
+    void beginAssembly(AsmPrinter &AP);
+    void finishAssembly(AsmPrinter &AP);
   };
 
 }
@@ -43,33 +44,34 @@
 
 void llvm::linkOcamlGCPrinter() { }
 
-static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP,
-                           const MCAsmInfo &MAI, const char *Id) {
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   const std::string &MId = M.getModuleIdentifier();
 
-  std::string Mangled;
-  Mangled += MAI.getGlobalPrefix();
-  Mangled += "caml";
-  size_t Letter = Mangled.size();
-  Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
-  Mangled += "__";
-  Mangled += Id;
-
+  std::string SymName;
+  SymName += "caml";
+  size_t Letter = SymName.size();
+  SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+  SymName += "__";
+  SymName += Id;
+  
   // Capitalize the first letter of the module name.
-  Mangled[Letter] = toupper(Mangled[Letter]);
+  SymName[Letter] = toupper(SymName[Letter]);
+  
+  SmallString<128> TmpStr;
+  AP.Mang->getNameWithPrefix(TmpStr, SymName);
+  
+  MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
 
-  if (const char *GlobalDirective = MAI.getGlobalDirective())
-    OS << GlobalDirective << Mangled << "\n";
-  OS << Mangled << ":\n";
+  AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+  AP.OutStreamer.EmitLabel(Sym);
 }
 
-void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                           const MCAsmInfo &MAI) {
+void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
-  EmitCamlGlobal(getModule(), OS, AP, MAI, "code_begin");
+  EmitCamlGlobal(getModule(), AP, "code_begin");
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, MAI, "data_begin");
+  EmitCamlGlobal(getModule(), AP, "data_begin");
 }
 
 /// emitAssembly - Print the frametable. The ocaml frametable format is thus:
@@ -88,62 +90,48 @@
 /// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
 /// either condition is detected in a function which uses the GC.
 ///
-void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                            const MCAsmInfo &MAI) {
-  const char *AddressDirective;
-  int AddressAlignLog;
-  if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) {
-    AddressDirective = MAI.getData32bitsDirective();
-    AddressAlignLog = 2;
-  } else {
-    AddressDirective = MAI.getData64bitsDirective();
-    AddressAlignLog = 3;
-  }
+void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+  unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize();
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
-  EmitCamlGlobal(getModule(), OS, AP, MAI, "code_end");
+  EmitCamlGlobal(getModule(), AP, "code_end");
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, MAI, "data_end");
+  EmitCamlGlobal(getModule(), AP, "data_end");
 
-  OS << AddressDirective << 0 << '\n'; // FIXME: Why does ocaml emit this??
+  // FIXME: Why does ocaml emit this??
+  AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0);
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, MAI, "frametable");
+  EmitCamlGlobal(getModule(), AP, "frametable");
 
   for (iterator I = begin(), IE = end(); I != IE; ++I) {
     GCFunctionInfo &FI = **I;
 
     uint64_t FrameSize = FI.getFrameSize();
     if (FrameSize >= 1<<16) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Function '" << FI.getFunction().getName()
-           << "' is too large for the ocaml GC! "
-           << "Frame size " << FrameSize << " >= 65536.\n";
-      Msg << "(" << uintptr_t(&FI) << ")";
-      llvm_report_error(Msg.str()); // Very rude!
+      // Very rude!
+      report_fatal_error("Function '" + FI.getFunction().getName() +
+                         "' is too large for the ocaml GC! "
+                         "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+                         "(" + Twine(uintptr_t(&FI)) + ")");
     }
 
-    OS << "\t" << MAI.getCommentString() << " live roots for "
-       << FI.getFunction().getName() << "\n";
+    AP.OutStreamer.AddComment("live roots for " +
+                              Twine(FI.getFunction().getName()));
+    AP.OutStreamer.AddBlankLine();
 
     for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
       size_t LiveCount = FI.live_size(J);
       if (LiveCount >= 1<<16) {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "Function '" << FI.getFunction().getName()
-             << "' is too large for the ocaml GC! "
-             << "Live root count " << LiveCount << " >= 65536.";
-        llvm_report_error(Msg.str()); // Very rude!
+        // Very rude!
+        report_fatal_error("Function '" + FI.getFunction().getName() +
+                           "' is too large for the ocaml GC! "
+                           "Live root count "+Twine(LiveCount)+" >= 65536.");
       }
 
-      OS << AddressDirective
-        << MAI.getPrivateGlobalPrefix() << "label" << J->Num << '\n';
-
+      AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
       AP.EmitInt16(FrameSize);
-
       AP.EmitInt16(LiveCount);
 
       for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
@@ -155,7 +143,7 @@
         AP.EmitInt32(K->StackOffset);
       }
 
-      AP.EmitAlignment(AddressAlignLog);
+      AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
     }
   }
 }
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index d94729a..8f51940 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -105,17 +105,6 @@
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
 
-  // If there are any labels in the basic block, unregister them from
-  // MachineModuleInfo.
-  if (MMI && !MBB->empty()) {
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-         I != E; ++I) {
-      if (I->isLabel())
-        // The label ID # is always operand #0, an immediate.
-        MMI->InvalidateLabel(I->getOperand(0).getImm());
-    }
-  }
-
   // Remove the block.
   MF->erase(MBB);
 }
@@ -203,7 +192,7 @@
     MadeChange |= MadeChangeThisIteration;
   }
 
-  // See if any jump tables have become mergable or dead as the code generator
+  // See if any jump tables have become dead as the code generator
   // did its thing.
   MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
   if (JTI == 0) {
@@ -211,27 +200,8 @@
     return MadeChange;
   }
   
-  const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
-  // Figure out how these jump tables should be merged.
-  std::vector<unsigned> JTMapping;
-  JTMapping.reserve(JTs.size());
-
-  // We always keep the 0th jump table.
-  JTMapping.push_back(0);
-
-  // Scan the jump tables, seeing if there are any duplicates.  Note that this
-  // is N^2, which should be fixed someday.
-  for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
-    if (JTs[i].MBBs.empty())
-      JTMapping.push_back(i);
-    else
-      JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
-  }
-
-  // If a jump table was merge with another one, walk the function rewriting
-  // references to jump tables to reference the new JT ID's.  Keep track of
-  // whether we see a jump table idx, if not, we can delete the JT.
-  BitVector JTIsLive(JTs.size());
+  // Walk the function to find jump tables that are live.
+  BitVector JTIsLive(JTI->getJumpTables().size());
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
        BB != E; ++BB) {
     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -239,17 +209,14 @@
       for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
         MachineOperand &Op = I->getOperand(op);
         if (!Op.isJTI()) continue;
-        unsigned NewIdx = JTMapping[Op.getIndex()];
-        Op.setIndex(NewIdx);
 
         // Remember that this JT is live.
-        JTIsLive.set(NewIdx);
+        JTIsLive.set(Op.getIndex());
       }
   }
 
-  // Finally, remove dead jump tables.  This happens either because the
-  // indirect jump was unreachable (and thus deleted) or because the jump
-  // table was merged with some other one.
+  // Finally, remove dead jump tables.  This happens when the
+  // indirect jump was unreachable (and thus deleted).
   for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
     if (!JTIsLive.test(i)) {
       JTI->RemoveJumpTable(i);
@@ -310,12 +277,23 @@
     return 0;   // Empty MBB.
 
   --I;
+  // Skip debug info so it will not affect codegen.
+  while (I->isDebugValue()) {
+    if (I==MBB->begin())
+      return 0;      // MBB empty except for debug info.
+    --I;
+  }
   unsigned Hash = HashMachineInstr(I);
 
   if (I == MBB->begin() || minCommonTailLength == 1)
     return Hash;   // Single instr MBB.
 
   --I;
+  while (I->isDebugValue()) {
+    if (I==MBB->begin())
+      return Hash;      // MBB with single non-debug instr.
+    --I;
+  }
   // Hash in the second-to-last instruction.
   Hash ^= HashMachineInstr(I) << 2;
   return Hash;
@@ -334,9 +312,32 @@
   unsigned TailLen = 0;
   while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
     --I1; --I2;
-    // Don't merge debugging pseudos.
-    if (I1->isDebugValue() || I2->isDebugValue() ||
-        !I1->isIdenticalTo(I2) ||
+    // Skip debugging pseudos; necessary to avoid changing the code.
+    while (I1->isDebugValue()) {
+      if (I1==MBB1->begin()) {
+        while (I2->isDebugValue()) {
+          if (I2==MBB2->begin())
+            // I1==DBG at begin; I2==DBG at begin
+            return TailLen;
+          --I2;
+        }
+        ++I2;
+        // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
+        return TailLen;
+      }
+      --I1;
+    }
+    // I1==first (untested) non-DBG preceding known match
+    while (I2->isDebugValue()) {
+      if (I2==MBB2->begin()) {
+        ++I1;
+        // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
+        return TailLen;
+      }
+      --I2;
+    }
+    // I1, I2==first (untested) non-DBGs preceding known match
+    if (!I1->isIdenticalTo(I2) ||
         // FIXME: This check is dubious. It's used to get around a problem where
         // people incorrectly expect inline asm directives to remain in the same
         // relative order. This is untenable because normal compiler
@@ -348,6 +349,29 @@
     }
     ++TailLen;
   }
+  // Back past possible debugging pseudos at beginning of block.  This matters
+  // when one block differs from the other only by whether debugging pseudos
+  // are present at the beginning.  (This way, the various checks later for
+  // I1==MBB1->begin() work as expected.)
+  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
+    --I2;
+    while (I2->isDebugValue()) {
+      if (I2 == MBB2->begin()) {
+        return TailLen;
+        }
+      --I2;
+    }
+    ++I2;
+  }
+  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
+    --I1;
+    while (I1->isDebugValue()) {
+      if (I1 == MBB1->begin())
+        return TailLen;
+      --I1;
+    }
+    ++I1;
+  }
   return TailLen;
 }
 
@@ -643,6 +667,8 @@
     SameTails[commonTailIndex].getTailStartPos();
   MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
 
+  // If the common tail includes any debug info we will take it pretty
+  // randomly from one of the inputs.  Might be better to remove it?
   DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
                << maxCommonTailLength);
 
@@ -912,6 +938,29 @@
   return MadeChange;
 }
 
+// Blocks should be considered empty if they contain only debug info;
+// else the debug info would affect codegen.
+static bool IsEmptyBlock(MachineBasicBlock *MBB) {
+  if (MBB->empty())
+    return true;
+  for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+       MBBI!=MBBE; ++MBBI) {
+    if (!MBBI->isDebugValue())
+      return false;
+  }
+  return true;
+}
+
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+  MachineBasicBlock::iterator MBBI, MBBE;
+  for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
+    if (!MBBI->isDebugValue())
+      break;
+  }
+  return (MBBI->getDesc().isBranch());
+}
 
 /// IsBetterFallthrough - Return true if it would be clearly better to
 /// fall-through to MBB1 than to fall through into MBB2.  This has to return
@@ -923,15 +972,21 @@
   // MBB1 doesn't, we prefer to fall through into MBB1.  This allows us to
   // optimize branches that branch to either a return block or an assert block
   // into a fallthrough to the return.
-  if (MBB1->empty() || MBB2->empty()) return false;
+  if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
 
   // If there is a clear successor ordering we make sure that one block
   // will fall through to the next
   if (MBB1->isSuccessor(MBB2)) return true;
   if (MBB2->isSuccessor(MBB1)) return false;
 
-  MachineInstr *MBB1I = --MBB1->end();
-  MachineInstr *MBB2I = --MBB2->end();
+  // Neither block consists entirely of debug info (per IsEmptyBlock check),
+  // so we needn't test for falling off the beginning here.
+  MachineBasicBlock::iterator MBB1I = --MBB1->end();
+  while (MBB1I->isDebugValue())
+    --MBB1I;
+  MachineBasicBlock::iterator MBB2I = --MBB2->end();
+  while (MBB2I->isDebugValue())
+    --MBB2I;
   return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
 }
 
@@ -949,7 +1004,7 @@
   // explicitly.  Landing pads should not do this since the landing-pad table
   // points to this block.  Blocks with their addresses taken shouldn't be
   // optimized away.
-  if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+  if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
     // Dead block?  Leave for cleanup later.
     if (MBB->pred_empty()) return MadeChange;
 
@@ -1072,22 +1127,6 @@
           !IsBetterFallthrough(PriorTBB, MBB))
         DoTransform = false;
 
-      // We don't want to do this transformation if we have control flow like:
-      //   br cond BB2
-      // BB1:
-      //   ..
-      //   jmp BBX
-      // BB2:
-      //   ..
-      //   ret
-      //
-      // In this case, we could actually be moving the return block *into* a
-      // loop!
-      if (DoTransform && !MBB->succ_empty() &&
-          (!PriorTBB->canFallThrough() || PriorTBB->empty()))
-        DoTransform = false;
-
-
       if (DoTransform) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
@@ -1135,13 +1174,29 @@
     // If this branch is the only thing in its block, see if we can forward
     // other blocks across it.
     if (CurTBB && CurCond.empty() && CurFBB == 0 &&
-        MBB->begin()->getDesc().isBranch() && CurTBB != MBB &&
+        IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
         !MBB->hasAddressTaken()) {
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
       // then seeing if the block is empty.
       TII->RemoveBranch(*MBB);
-
+      // If the only things remaining in the block are debug info, remove these
+      // as well, so this will behave the same as an empty block in non-debug
+      // mode.
+      if (!MBB->empty()) {
+        bool NonDebugInfoFound = false;
+        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+             I != E; ++I) {
+          if (!I->isDebugValue()) {
+            NonDebugInfoFound = true;
+            break;
+          }
+        }
+        if (!NonDebugInfoFound)
+          // Make the block empty, losing the debug info (we could probably
+          // improve this in some cases.)
+          MBB->erase(MBB->begin(), MBB->end());
+      }
       // If this block is just an unconditional branch to CurTBB, we can
       // usually completely eliminate the block.  The only case we cannot
       // completely eliminate the block is when the block before this one
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index d385b86..3e38872 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_library(LLVMCodeGen
+  Analysis.cpp
   AggressiveAntiDepBreaker.cpp
   BranchFolding.cpp
   CalcSpillWeights.cpp
@@ -27,6 +28,7 @@
   MachineFunction.cpp
   MachineFunctionAnalysis.cpp
   MachineFunctionPass.cpp
+  MachineFunctionPrinterPass.cpp
   MachineInstr.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
@@ -48,6 +50,7 @@
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
+  RegAllocFast.cpp
   RegAllocLinearScan.cpp
   RegAllocLocal.cpp
   RegAllocPBQP.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 7d3de89..759fbaa 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -143,13 +143,13 @@
 
 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
 /// critical path.
-static SDep *CriticalPathStep(SUnit *SU) {
-  SDep *Next = 0;
+static const SDep *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
   unsigned NextDepth = 0;
   // Find the predecessor edge with the greatest depth.
-  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
-    SUnit *PredSU = P->getSUnit();
+    const SUnit *PredSU = P->getSUnit();
     unsigned PredLatency = P->getLatency();
     unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
     // In the case of a latency tie, prefer an anti-dependency edge over
@@ -326,18 +326,18 @@
 }
 
 unsigned CriticalAntiDepBreaker::
-BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                      MachineBasicBlock::iterator& Begin,
-                      MachineBasicBlock::iterator& End,
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                      MachineBasicBlock::iterator Begin,
+                      MachineBasicBlock::iterator End,
                       unsigned InsertPosIndex) {
   // The code below assumes that there is at least one instruction,
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
 
   // Find the node at the bottom of the critical path.
-  SUnit *Max = 0;
+  const SUnit *Max = 0;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    SUnit *SU = &SUnits[i];
+    const SUnit *SU = &SUnits[i];
     if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
       Max = SU;
   }
@@ -357,7 +357,7 @@
 
   // Track progress along the critical path through the SUnit graph as we walk
   // the instructions.
-  SUnit *CriticalPathSU = Max;
+  const SUnit *CriticalPathSU = Max;
   MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
 
   // Consider this pattern:
@@ -429,8 +429,8 @@
     // the anti-dependencies in an instruction in order to be effective.
     unsigned AntiDepReg = 0;
     if (MI == CriticalPathMI) {
-      if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
-        SUnit *NextSU = Edge->getSUnit();
+      if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+        const SUnit *NextSU = Edge->getSUnit();
 
         // Only consider anti-dependence edges.
         if (Edge->getKind() == SDep::Anti) {
@@ -452,7 +452,7 @@
             // Also, if there are dependencies on other SUnits with the
             // same register as the anti-dependency, don't attempt to
             // break it.
-            for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
+            for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
                  PE = CriticalPathSU->Preds.end(); P != PE; ++P)
               if (P->getSUnit() == NextSU ?
                     (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 9e8db02..cc42dd2 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -72,9 +72,9 @@
     /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
-    unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   MachineBasicBlock::iterator& Begin,
-                                   MachineBasicBlock::iterator& End,
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex);
 
     /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 39fc85e..f6739f4 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -8,19 +8,19 @@
 //===----------------------------------------------------------------------===//
 //
 // This pass mulches exception handling code into a form adapted to code
-// generation.  Required if using dwarf exception handling.
+// generation. Required if using dwarf exception handling.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dwarfehprepare"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -34,12 +34,22 @@
 
 namespace {
   class DwarfEHPrepare : public FunctionPass {
+    const TargetMachine *TM;
     const TargetLowering *TLI;
     bool CompileFast;
 
     // The eh.exception intrinsic.
     Function *ExceptionValueIntrinsic;
 
+    // The eh.selector intrinsic.
+    Function *SelectorIntrinsic;
+
+    // _Unwind_Resume_or_Rethrow call.
+    Constant *URoR;
+
+    // The EH language-specific catch-all type.
+    GlobalVariable *EHCatchAllValue;
+
     // _Unwind_Resume or the target equivalent.
     Constant *RewindFunction;
 
@@ -67,18 +77,89 @@
     Instruction *CreateValueLoad(BasicBlock *BB);
 
     /// CreateReadOfExceptionValue - Return the result of the eh.exception
-    /// intrinsic by calling the intrinsic if in a landing pad, or loading
-    /// it from the exception value variable otherwise.
+    /// intrinsic by calling the intrinsic if in a landing pad, or loading it
+    /// from the exception value variable otherwise.
     Instruction *CreateReadOfExceptionValue(BasicBlock *BB) {
       return LandingPads.count(BB) ?
         CreateExceptionValueCall(BB) : CreateValueLoad(BB);
     }
 
+    /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
+    /// use the ".llvm.eh.catch.all.value" call need to convert to using its
+    /// initializer instead.
+    bool CleanupSelectors();
+
+    /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+    void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+
+    /// FindAllURoRInvokes - Find all URoR invokes in the function.
+    void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
+
+    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow"
+    /// calls. The "unwind" part of these invokes jump to a landing pad within
+    /// the current function. This is a candidate to merge the selector
+    /// associated with the URoR invoke with the one from the URoR's landing
+    /// pad.
+    bool HandleURoRInvokes();
+
+    /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
+    /// with the eh.exception call. This recursively looks past instructions
+    /// which don't change the EH pointer value, like casts or PHI nodes.
+    bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+                             SmallPtrSet<IntrinsicInst*, 8> &SelCalls);
+      
+    /// DoMem2RegPromotion - Take an alloca call and promote it from memory to a
+    /// register.
+    bool DoMem2RegPromotion(Value *V) {
+      AllocaInst *AI = dyn_cast<AllocaInst>(V);
+      if (!AI || !isAllocaPromotable(AI)) return false;
+
+      // Turn the alloca into a register.
+      std::vector<AllocaInst*> Allocas(1, AI);
+      PromoteMemToReg(Allocas, *DT, *DF);
+      return true;
+    }
+
+    /// PromoteStoreInst - Perform Mem2Reg on a StoreInst.
+    bool PromoteStoreInst(StoreInst *SI) {
+      if (!SI || !DT || !DF) return false;
+      if (DoMem2RegPromotion(SI->getOperand(1)))
+        return true;
+      return false;
+    }
+
+    /// PromoteEHPtrStore - Promote the storing of an EH pointer into a
+    /// register. This should get rid of the store and subsequent loads.
+    bool PromoteEHPtrStore(IntrinsicInst *II) {
+      if (!DT || !DF) return false;
+
+      bool Changed = false;
+      StoreInst *SI;
+
+      while (1) {
+        SI = 0;
+        for (Value::use_iterator
+               I = II->use_begin(), E = II->use_end(); I != E; ++I) {
+          SI = dyn_cast<StoreInst>(I);
+          if (SI) break;
+        }
+
+        if (!PromoteStoreInst(SI))
+          break;
+
+        Changed = true;
+      }
+
+      return false;
+    }
+
   public:
     static char ID; // Pass identification, replacement for typeid.
-    DwarfEHPrepare(const TargetLowering *tli, bool fast) :
-      FunctionPass(&ID), TLI(tli), CompileFast(fast),
-      ExceptionValueIntrinsic(0), RewindFunction(0) {}
+    DwarfEHPrepare(const TargetMachine *tm, bool fast) :
+      FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()),
+      CompileFast(fast),
+      ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
+      URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
 
     virtual bool runOnFunction(Function &Fn);
 
@@ -101,8 +182,235 @@
 
 char DwarfEHPrepare::ID = 0;
 
-FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
-  return new DwarfEHPrepare(tli, fast);
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) {
+  return new DwarfEHPrepare(tm, fast);
+}
+
+/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+void DwarfEHPrepare::
+FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
+  for (Value::use_iterator
+         I = SelectorIntrinsic->use_begin(),
+         E = SelectorIntrinsic->use_end(); I != E; ++I) {
+    IntrinsicInst *SI = cast<IntrinsicInst>(I);
+    if (!SI || SI->getParent()->getParent() != F) continue;
+
+    unsigned NumOps = SI->getNumOperands();
+    if (NumOps > 4) continue;
+    bool IsCleanUp = (NumOps == 3);
+
+    if (!IsCleanUp)
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getOperand(3)))
+        IsCleanUp = (CI->getZExtValue() == 0);
+
+    if (IsCleanUp)
+      Sels.insert(SI);
+  }
+}
+
+/// FindAllURoRInvokes - Find all URoR invokes in the function.
+void DwarfEHPrepare::
+FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
+  for (Value::use_iterator
+         I = URoR->use_begin(),
+         E = URoR->use_end(); I != E; ++I) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+      URoRInvokes.insert(II);
+  }
+}
+
+/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
+/// the ".llvm.eh.catch.all.value" call need to convert to using its
+/// initializer instead.
+bool DwarfEHPrepare::CleanupSelectors() {
+  if (!EHCatchAllValue) return false;
+
+  if (!SelectorIntrinsic) {
+    SelectorIntrinsic =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+    if (!SelectorIntrinsic) return false;
+  }
+
+  bool Changed = false;
+  for (Value::use_iterator
+         I = SelectorIntrinsic->use_begin(),
+         E = SelectorIntrinsic->use_end(); I != E; ++I) {
+    IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(I);
+    if (!Sel || Sel->getParent()->getParent() != F) continue;
+
+    // Index of the ".llvm.eh.catch.all.value" variable.
+    unsigned OpIdx = Sel->getNumOperands() - 1;
+    GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getOperand(OpIdx));
+    if (GV != EHCatchAllValue) continue;
+    Sel->setOperand(OpIdx, EHCatchAllValue->getInitializer());
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// FindSelectorAndURoR - Find the eh.selector call associated with the
+/// eh.exception call. And indicate if there is a URoR "invoke" associated with
+/// the eh.exception call. This recursively looks past instructions which don't
+/// change the EH pointer value, like casts or PHI nodes.
+bool
+DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+                                    SmallPtrSet<IntrinsicInst*, 8> &SelCalls) {
+  SmallPtrSet<PHINode*, 32> SeenPHIs;
+  bool Changed = false;
+
+ restart:
+  for (Value::use_iterator
+         I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
+    Instruction *II = dyn_cast<Instruction>(I);
+    if (!II || II->getParent()->getParent() != F) continue;
+    
+    if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
+      if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
+        SelCalls.insert(Sel);
+    } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
+      if (Invoke->getCalledFunction() == URoR)
+        URoRInvoke = true;
+    } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
+      Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls);
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+      if (!PromoteStoreInst(SI)) continue;
+      Changed = true;
+      SeenPHIs.clear();
+      goto restart;             // Uses may have changed, restart loop.
+    } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
+      if (SeenPHIs.insert(PN))
+        // Don't process a PHI node more than once.
+        Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls);
+    }
+  }
+
+  return Changed;
+}
+
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The
+/// "unwind" part of these invokes jump to a landing pad within the current
+/// function. This is a candidate to merge the selector associated with the URoR
+/// invoke with the one from the URoR's landing pad.
+bool DwarfEHPrepare::HandleURoRInvokes() {
+  if (!DT) return CleanupSelectors(); // We require DominatorTree information.
+
+  if (!EHCatchAllValue) {
+    EHCatchAllValue =
+      F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value");
+    if (!EHCatchAllValue) return false;
+  }
+
+  if (!SelectorIntrinsic) {
+    SelectorIntrinsic =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+    if (!SelectorIntrinsic) return false;
+  }
+
+  if (!URoR) {
+    URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
+    if (!URoR) return CleanupSelectors();
+  }
+
+  SmallPtrSet<IntrinsicInst*, 32> Sels;
+  SmallPtrSet<InvokeInst*, 32> URoRInvokes;
+  FindAllCleanupSelectors(Sels);
+  FindAllURoRInvokes(URoRInvokes);
+
+  SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
+
+  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+         SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
+    const BasicBlock *SelBB = (*SI)->getParent();
+    for (SmallPtrSet<InvokeInst*, 32>::iterator
+           UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
+      const BasicBlock *URoRBB = (*UI)->getParent();
+      if (SelBB == URoRBB || DT->dominates(SelBB, URoRBB)) {
+        SelsToConvert.insert(*SI);
+        break;
+      }
+    }
+  }
+
+  bool Changed = false;
+
+  if (Sels.size() != SelsToConvert.size()) {
+    // If we haven't been able to convert all of the clean-up selectors, then
+    // loop through the slow way to see if they still need to be converted.
+    if (!ExceptionValueIntrinsic) {
+      ExceptionValueIntrinsic =
+        Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
+      if (!ExceptionValueIntrinsic) return CleanupSelectors();
+    }
+
+    for (Value::use_iterator
+           I = ExceptionValueIntrinsic->use_begin(),
+           E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
+      IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(I);
+      if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
+
+      Changed |= PromoteEHPtrStore(EHPtr);
+
+      bool URoRInvoke = false;
+      SmallPtrSet<IntrinsicInst*, 8> SelCalls;
+      Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls);
+
+      if (URoRInvoke) {
+        // This EH pointer is being used by an invoke of an URoR instruction and
+        // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
+        // need to convert it to a 'catch-all'.
+        for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+               SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) {
+          IntrinsicInst *II = *SI;
+          unsigned NumOps = II->getNumOperands();
+
+          if (NumOps <= 4) {
+            bool IsCleanUp = (NumOps == 3);
+
+            if (!IsCleanUp)
+              if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(3)))
+                IsCleanUp = (CI->getZExtValue() == 0);
+
+            if (IsCleanUp)
+              SelsToConvert.insert(II);
+          }
+        }
+      }
+    }
+  }
+
+  if (!SelsToConvert.empty()) {
+    // Convert all clean-up eh.selectors, which are associated with "invokes" of
+    // URoR calls, into catch-all eh.selectors.
+    Changed = true;
+
+    for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+           SI = SelsToConvert.begin(), SE = SelsToConvert.end();
+         SI != SE; ++SI) {
+      IntrinsicInst *II = *SI;
+      SmallVector<Value*, 8> Args;
+
+      // Use the exception object pointer and the personality function
+      // from the original selector.
+      Args.push_back(II->getOperand(1)); // Exception object pointer.
+      Args.push_back(II->getOperand(2)); // Personality function.
+      Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
+
+      CallInst *NewSelector =
+        CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(),
+                         "eh.sel.catch.all", II);
+
+      NewSelector->setTailCall(II->isTailCall());
+      NewSelector->setAttributes(II->getAttributes());
+      NewSelector->setCallingConv(II->getCallingConv());
+
+      II->replaceAllUsesWith(NewSelector);
+      II->eraseFromParent();
+    }
+  }
+
+  Changed |= CleanupSelectors();
+  return Changed;
 }
 
 /// NormalizeLandingPads - Normalize and discover landing pads, noting them
@@ -115,7 +423,7 @@
 bool DwarfEHPrepare::NormalizeLandingPads() {
   bool Changed = false;
 
-  const MCAsmInfo *MAI = TLI->getTargetMachine().getMCAsmInfo();
+  const MCAsmInfo *MAI = TM->getMCAsmInfo();
   bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
@@ -355,7 +663,7 @@
 /// the start of the basic block (unless there already is one, in which case
 /// the existing call is returned).
 Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
-  Instruction *Start = BB->getFirstNonPHI();
+  Instruction *Start = BB->getFirstNonPHIOrDbg();
   // Is this a call to eh.exception?
   if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
     if (CI->getIntrinsicID() == Intrinsic::eh_exception)
@@ -375,7 +683,7 @@
 /// (creating it if necessary) at the start of the basic block (unless
 /// there already is a load, in which case the existing load is returned).
 Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
-  Instruction *Start = BB->getFirstNonPHI();
+  Instruction *Start = BB->getFirstNonPHIOrDbg();
   // Is this a load of the exception temporary?
   if (ExceptionValueVar)
     if (LoadInst* LI = dyn_cast<LoadInst>(Start))
@@ -422,6 +730,8 @@
   if (!CompileFast)
     Changed |= PromoteStackTemporaries();
 
+  Changed |= HandleURoRInvokes();
+
   LandingPads.clear();
 
   return Changed;
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index b5e9c84..2ec1f6e 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -57,13 +57,13 @@
     bool finishFunction(MachineFunction &F);
 
     /// emitLabel - Emits a label
-    virtual void emitLabel(uint64_t LabelID) {
+    virtual void emitLabel(MCSymbol *Label) {
       assert("emitLabel not implemented");
     }
 
     /// getLabelAddress - Return the address of the specified LabelID, 
     /// only usable after the LabelID has been emitted.
-    virtual uintptr_t getLabelAddress(uint64_t Label) const {
+    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
       assert("getLabelAddress not implemented");
       return 0;
     }
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 0979c04..b644ebe 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -64,7 +64,7 @@
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   : MachineFunctionPass(&ID), O(o), TM(tm),
-    OutContext(*new MCContext()),
+    OutContext(*new MCContext(*TM.getMCAsmInfo())),
     TLOF(TM.getTargetLowering()->getObjFileLowering()),
     is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
     isLittleEndian(TM.getTargetData()->isLittleEndian()),
@@ -109,7 +109,7 @@
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
   
-  Mang = new Mangler(*MAI);
+  Mang = new Mangler(OutContext, *TM.getTargetData());
 
   // ELF Header
   // ----------
@@ -208,7 +208,7 @@
 }
 
 // getTextSection - Get the text section for the specified function
-ELFSection &ELFWriter::getTextSection(Function *F) {
+ELFSection &ELFWriter::getTextSection(const Function *F) {
   const MCSectionELF *Text = 
     (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
   return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
@@ -507,7 +507,7 @@
   std::string msg;
   raw_string_ostream ErrorMsg(msg);
   ErrorMsg << "Constant unimp for type: " << *CV->getType();
-  llvm_report_error(ErrorMsg.str());
+  report_fatal_error(ErrorMsg.str());
 }
 
 // ResolveConstantExpr - Resolve the constant expression until it stop
@@ -572,10 +572,8 @@
   }
   }
 
-  std::string msg(CE->getOpcodeName());
-  raw_string_ostream ErrorMsg(msg);
-  ErrorMsg << ": Unsupported ConstantExpr type";
-  llvm_report_error(ErrorMsg.str());
+  report_fatal_error(CE->getOpcodeName() +
+                     StringRef(": Unsupported ConstantExpr type"));
 
   return std::make_pair(CV, 0); // silence warning
 }
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index b61b484..db66ecc 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -191,7 +191,7 @@
     ELFSection &getDtorSection();
     ELFSection &getJumpTableSection();
     ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
-    ELFSection &getTextSection(Function *F);
+    ELFSection &getTextSection(const Function *F);
     ELFSection &getRelocSection(ELFSection &S);
 
     // Helpers for obtaining ELF specific info.
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index 61959bb..af5f289 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -29,7 +29,7 @@
   // Determine the maximum depth of any itinerary. This determines the
   // depth of the scoreboard. We always make the scoreboard at least 1
   // cycle deep to avoid dealing with the boundary condition.
-  ScoreboardDepth = 1;
+  unsigned ScoreboardDepth = 1;
   if (!ItinData.isEmpty()) {
     for (unsigned idx = 0; ; ++idx) {
       if (ItinData.isEndMarker(idx))
@@ -45,35 +45,27 @@
     }
   }
 
-  Scoreboard = new unsigned[ScoreboardDepth];
-  ScoreboardHead = 0;
+  ReservedScoreboard.reset(ScoreboardDepth);
+  RequiredScoreboard.reset(ScoreboardDepth);
 
   DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " 
                << ScoreboardDepth << '\n');
 }
 
-ExactHazardRecognizer::~ExactHazardRecognizer() {
-  delete [] Scoreboard;
-}
-
 void ExactHazardRecognizer::Reset() {
-  memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned));
-  ScoreboardHead = 0;
+  RequiredScoreboard.reset();
+  ReservedScoreboard.reset();
 }
 
-unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
-  return (ScoreboardHead + offset) % ScoreboardDepth;
-}
-
-void ExactHazardRecognizer::dumpScoreboard() {
+void ExactHazardRecognizer::ScoreBoard::dump() const {
   dbgs() << "Scoreboard:\n";
-  
-  unsigned last = ScoreboardDepth - 1;
-  while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
+
+  unsigned last = Depth - 1;
+  while ((last > 0) && ((*this)[last] == 0))
     last--;
 
   for (unsigned i = 0; i <= last; i++) {
-    unsigned FUs = Scoreboard[getFutureIndex(i)];
+    unsigned FUs = (*this)[i];
     dbgs() << "\t";
     for (int j = 31; j >= 0; j--)
       dbgs() << ((FUs & (1 << j)) ? '1' : '0');
@@ -96,11 +88,23 @@
     // stage is occupied. FIXME it would be more accurate to find the
     // same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < ScoreboardDepth) && 
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
-      
-      unsigned index = getFutureIndex(cycle + i);
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
       if (!freeUnits) {
         DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
         DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
@@ -108,14 +112,14 @@
         return Hazard;
       }
     }
-    
+
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
 
   return NoHazard;
 }
-    
+
 void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
   if (ItinData.isEmpty())
     return;
@@ -125,37 +129,52 @@
   // Use the itinerary for the underlying instruction to reserve FU's
   // in the scoreboard at the appropriate future cycles.
   unsigned idx = SU->getInstr()->getDesc().getSchedClass();
-  for (const InstrStage *IS = ItinData.beginStage(idx), 
+  for (const InstrStage *IS = ItinData.beginStage(idx),
          *E = ItinData.endStage(idx); IS != E; ++IS) {
     // We must reserve one of the stage's units for every cycle the
     // stage is occupied. FIXME it would be more accurate to reserve
     // the same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < ScoreboardDepth) &&
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
-      
-      unsigned index = getFutureIndex(cycle + i);
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
-      
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
       // reduce to a single unit
       unsigned freeUnit = 0;
       do {
         freeUnit = freeUnits;
         freeUnits = freeUnit & (freeUnit - 1);
       } while (freeUnits);
-      
+
       assert(freeUnit && "No function unit available!");
-      Scoreboard[index] |= freeUnit;
+      if (IS->getReservationKind() == InstrStage::Required)
+        RequiredScoreboard[cycle + i] |= freeUnit;
+      else
+        ReservedScoreboard[cycle + i] |= freeUnit;
     }
-    
+
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
-  
-  DEBUG(dumpScoreboard());
+
+  DEBUG(ReservedScoreboard.dump());
+  DEBUG(RequiredScoreboard.dump());
 }
-    
+
 void ExactHazardRecognizer::AdvanceCycle() {
-  Scoreboard[ScoreboardHead] = 0;
-  ScoreboardHead = getFutureIndex(1);
+  ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+  RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
 }
diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h
index 71ac979..91c81a9 100644
--- a/lib/CodeGen/ExactHazardRecognizer.h
+++ b/lib/CodeGen/ExactHazardRecognizer.h
@@ -22,35 +22,60 @@
 
 namespace llvm {
   class ExactHazardRecognizer : public ScheduleHazardRecognizer {
+    // ScoreBoard to track function unit usage. ScoreBoard[0] is a
+    // mask of the FUs in use in the cycle currently being
+    // schedule. ScoreBoard[1] is a mask for the next cycle. The
+    // ScoreBoard is used as a circular buffer with the current cycle
+    // indicated by Head.
+    class ScoreBoard {
+      unsigned *Data;
+
+      // The maximum number of cycles monitored by the Scoreboard. This
+      // value is determined based on the target itineraries to ensure
+      // that all hazards can be tracked.
+      size_t Depth;
+      // Indices into the Scoreboard that represent the current cycle.
+      size_t Head;
+    public:
+      ScoreBoard():Data(NULL), Depth(0), Head(0) { }
+      ~ScoreBoard() {
+        delete[] Data;
+      }
+
+      size_t getDepth() const { return Depth; }
+      unsigned& operator[](size_t idx) const {
+        assert(Depth && "ScoreBoard was not initialized properly!");
+
+        return Data[(Head + idx) % Depth];
+      }
+
+      void reset(size_t d = 1) {
+        if (Data == NULL) {
+          Depth = d;
+          Data = new unsigned[Depth];
+        }
+
+        memset(Data, 0, Depth * sizeof(Data[0]));
+        Head = 0;
+      }
+
+      void advance() {
+        Head = (Head + 1) % Depth;
+      }
+
+      // Print the scoreboard.
+      void dump() const;
+    };
+
     // Itinerary data for the target.
     const InstrItineraryData &ItinData;
 
-    // Scoreboard to track function unit usage. Scoreboard[0] is a
-    // mask of the FUs in use in the cycle currently being
-    // schedule. Scoreboard[1] is a mask for the next cycle. The
-    // Scoreboard is used as a circular buffer with the current cycle
-    // indicated by ScoreboardHead.
-    unsigned *Scoreboard;
-
-    // The maximum number of cycles monitored by the Scoreboard. This
-    // value is determined based on the target itineraries to ensure
-    // that all hazards can be tracked.
-    unsigned ScoreboardDepth;
-
-    // Indices into the Scoreboard that represent the current cycle.
-    unsigned ScoreboardHead;
-
-    // Return the scoreboard index to use for 'offset' cycles in the
-    // future. 'offset' of 0 returns ScoreboardHead.
-    unsigned getFutureIndex(unsigned offset);
-
-    // Print the scoreboard.
-    void dumpScoreboard();
+    ScoreBoard ReservedScoreboard;
+    ScoreBoard RequiredScoreboard;
 
   public:
     ExactHazardRecognizer(const InstrItineraryData &ItinData);
-    ~ExactHazardRecognizer();
-    
+
     virtual HazardType getHazardType(SUnit *SU);
     virtual void Reset();
     virtual void EmitInstruction(SUnit *SU);
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 055172b..ab0a800 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -150,30 +151,31 @@
 }
 
 bool Printer::runOnFunction(Function &F) {
-  if (!F.hasGC()) {
-    GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  if (F.hasGC()) return false;
+  
+  GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  
+  OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+  for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+                                      RE = FD->roots_end(); RI != RE; ++RI)
+    OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+  
+  OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+  for (GCFunctionInfo::iterator PI = FD->begin(),
+                                PE = FD->end(); PI != PE; ++PI) {
     
-    OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
-    for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
-                                        RE = FD->roots_end(); RI != RE; ++RI)
-      OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+    OS << "\t" << PI->Label->getName() << ": "
+       << DescKind(PI->Kind) << ", live = {";
     
-    OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
-    for (GCFunctionInfo::iterator PI = FD->begin(),
-                                  PE = FD->end(); PI != PE; ++PI) {
-      
-      OS << "\tlabel " << PI->Num << ": " << DescKind(PI->Kind) << ", live = {";
-      
-      for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
-                                         RE = FD->live_end(PI);;) {
-        OS << " " << RI->Num;
-        if (++RI == RE)
-          break;
-        OS << ",";
-      }
-      
-      OS << " }\n";
+    for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+                                       RE = FD->live_end(PI);;) {
+      OS << " " << RI->Num;
+      if (++RI == RE)
+        break;
+      OS << ",";
     }
+    
+    OS << " }\n";
   }
   
   return false;
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index 9cd2925..f80e9ce 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -12,19 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCMetadataPrinter.h"
-
 using namespace llvm;
 
 GCMetadataPrinter::GCMetadataPrinter() { }
 
 GCMetadataPrinter::~GCMetadataPrinter() { }
 
-void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                      const MCAsmInfo &MAI) {
+void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
   // Default is no action.
 }
 
-void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                       const MCAsmInfo &MAI) {
+void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
   // Default is no action.
 }
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index b5006fd..790cb21 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -71,9 +71,9 @@
     
     void FindSafePoints(MachineFunction &MF);
     void VisitCallPoint(MachineBasicBlock::iterator MI);
-    unsigned InsertLabel(MachineBasicBlock &MBB, 
-                         MachineBasicBlock::iterator MI,
-                         DebugLoc DL) const;
+    MCSymbol *InsertLabel(MachineBasicBlock &MBB, 
+                          MachineBasicBlock::iterator MI,
+                          DebugLoc DL) const;
     
     void FindStackOffsets(MachineFunction &MF);
     
@@ -181,9 +181,10 @@
   
   for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
     if (!InitedRoots.count(*I)) {
-      new StoreInst(ConstantPointerNull::get(cast<PointerType>(
-                      cast<PointerType>((*I)->getType())->getElementType())),
-                    *I, IP);
+      StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+                        cast<PointerType>((*I)->getType())->getElementType())),
+                        *I);
+      SI->insertAfter(*I);
       MadeChange = true;
     }
   
@@ -329,14 +330,11 @@
   AU.addRequired<GCModuleInfo>();
 }
 
-unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
-                                     MachineBasicBlock::iterator MI,
-                                     DebugLoc DL) const {
-  unsigned Label = MMI->NextLabelID();
-  
-  BuildMI(MBB, MI, DL,
-          TII->get(TargetOpcode::GC_LABEL)).addImm(Label);
-  
+MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
+                                           MachineBasicBlock::iterator MI,
+                                           DebugLoc DL) const {
+  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
   return Label;
 }
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 87ab7ef..e1c52f7 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -331,15 +331,15 @@
   IRBuilder<> Builder(CI->getParent(), CI);
   LLVMContext &Context = CI->getContext();
 
-  Function *Callee = CI->getCalledFunction();
+  const Function *Callee = CI->getCalledFunction();
   assert(Callee && "Cannot lower an indirect call!");
 
   switch (Callee->getIntrinsicID()) {
   case Intrinsic::not_intrinsic:
-    llvm_report_error("Cannot lower a call to a non-intrinsic function '"+
+    report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
                       Callee->getName() + "'!");
   default:
-    llvm_report_error("Code generator does not support intrinsic function '"+
+    report_fatal_error("Code generator does not support intrinsic function '"+
                       Callee->getName()+"'!");
 
     // The setjmp/longjmp intrinsics should only exist in the code if it was
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 5e88865..331dc7d 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -13,16 +13,15 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/PassManager.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -51,6 +50,9 @@
     cl::desc("Disable Stack Slot Coloring"));
 static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
     cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+    cl::Hidden,
+    cl::desc("Disable Machine LICM"));
 static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
     cl::desc("Disable Machine Sinking"));
 static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
@@ -67,9 +69,6 @@
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
-static cl::opt<bool> EnableMachineCSE("enable-machine-cse", cl::Hidden,
-    cl::desc("Enable Machine CSE"));
-
 static cl::opt<cl::boolOrDefault>
 AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
            cl::init(cl::BOU_UNSET));
@@ -97,21 +96,19 @@
     cl::desc("Split GEPs and run no-load GVN"));
 
 LLVMTargetMachine::LLVMTargetMachine(const Target &T,
-                                     const std::string &TargetTriple)
-  : TargetMachine(T) {
+                                     const std::string &Triple)
+  : TargetMachine(T), TargetTriple(Triple) {
   AsmInfo = T.createAsmInfo(TargetTriple);
 }
 
 // Set the default code model for the JIT for a generic target.
 // FIXME: Is small right here? or .is64Bit() ? Large : Small?
-void
-LLVMTargetMachine::setCodeModelForJIT() {
+void LLVMTargetMachine::setCodeModelForJIT() {
   setCodeModel(CodeModel::Small);
 }
 
 // Set the default code model for static compilation for a generic target.
-void
-LLVMTargetMachine::setCodeModelForStatic() {
+void LLVMTargetMachine::setCodeModelForStatic() {
   setCodeModel(CodeModel::Small);
 }
 
@@ -121,65 +118,50 @@
                                             CodeGenOpt::Level OptLevel,
                                             bool DisableVerify) {
   // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
+  MCContext *Context = 0;
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
     return true;
+  assert(Context != 0 && "Failed to get MCContext");
 
-  OwningPtr<MCContext> Context(new MCContext());
+  const MCAsmInfo &MAI = *getMCAsmInfo();
   OwningPtr<MCStreamer> AsmStreamer;
 
-  formatted_raw_ostream *LegacyOutput;
   switch (FileType) {
   default: return true;
   case CGFT_AssemblyFile: {
-    const MCAsmInfo &MAI = *getMCAsmInfo();
     MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, Out);
-    AsmStreamer.reset(createAsmStreamer(*Context, Out, MAI,
+      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
+    AsmStreamer.reset(createAsmStreamer(*Context, Out,
                                         getTargetData()->isLittleEndian(),
                                         getVerboseAsm(), InstPrinter,
                                         /*codeemitter*/0));
-    // Set the AsmPrinter's "O" to the output file.
-    LegacyOutput = &Out;
     break;
   }
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
     MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
-    if (MCE == 0)
+    TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
+    if (MCE == 0 || TAB == 0)
       return true;
     
-    AsmStreamer.reset(createMachOStreamer(*Context, Out, MCE));
-    
-    // Any output to the asmprinter's "O" stream is bad and needs to be fixed,
-    // force it to come out stderr.
-    // FIXME: this is horrible and leaks, eventually remove the raw_ostream from
-    // asmprinter.
-    LegacyOutput = new formatted_raw_ostream(errs());
+    AsmStreamer.reset(createMachOStreamer(*Context, *TAB, Out, MCE));
     break;
   }
   case CGFT_Null:
     // The Null output is intended for use for performance analysis and testing,
     // not real users.
     AsmStreamer.reset(createNullStreamer(*Context));
-    // Any output to the asmprinter's "O" stream is bad and needs to be fixed,
-    // force it to come out stderr.
-    // FIXME: this is horrible and leaks, eventually remove the raw_ostream from
-    // asmprinter.
-    LegacyOutput = new formatted_raw_ostream(errs());
     break;
   }
   
-  // Create the AsmPrinter, which takes ownership of Context and AsmStreamer
-  // if successful.
-  FunctionPass *Printer =
-    getTarget().createAsmPrinter(*LegacyOutput, *this, *Context, *AsmStreamer,
-                                 getMCAsmInfo());
+  // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
   if (Printer == 0)
     return true;
   
-  // If successful, createAsmPrinter took ownership of AsmStreamer and Context.
-  Context.take(); AsmStreamer.take();
+  // If successful, createAsmPrinter took ownership of AsmStreamer.
+  AsmStreamer.take();
   
   PM.add(Printer);
   
@@ -203,7 +185,8 @@
   setCodeModelForJIT();
   
   // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
+  MCContext *Ctx = 0;
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
     return true;
 
   addCodeEmitter(PM, OptLevel, JCE);
@@ -212,8 +195,7 @@
   return false; // success!
 }
 
-static void printNoVerify(PassManagerBase &PM,
-                           const char *Banner) {
+static void printNoVerify(PassManagerBase &PM, const char *Banner) {
   if (PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
 }
@@ -233,7 +215,8 @@
 ///
 bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                                                CodeGenOpt::Level OptLevel,
-                                               bool DisableVerify) {
+                                               bool DisableVerify,
+                                               MCContext *&OutContext) {
   // Standard LLVM-Level Passes.
 
   // Before running any passes, run the verifier to determine if the input
@@ -256,8 +239,7 @@
 
   // Turn exception handling constructs into something the code generators can
   // handle.
-  switch (getMCAsmInfo()->getExceptionHandlingType())
-  {
+  switch (getMCAsmInfo()->getExceptionHandlingType()) {
   case ExceptionHandling::SjLj:
     // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
     // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
@@ -266,10 +248,10 @@
     // pad is shared by multiple invokes and is also a target of a normal
     // edge from elsewhere.
     PM.add(createSjLjEHPass(getTargetLowering()));
-    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
     break;
   case ExceptionHandling::Dwarf:
-    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
     break;
   case ExceptionHandling::None:
     PM.add(createLowerInvokePass(getTargetLowering()));
@@ -297,6 +279,13 @@
     PM.add(createVerifierPass());
 
   // Standard Lower-Level Passes.
+  
+  // Install a MachineModuleInfo class, which is an immutable pass that holds
+  // all the per-module stuff we're generating, including MCContext.
+  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
+  PM.add(MMI);
+  OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+  
 
   // Set up a MachineFunction for the rest of CodeGen to work on.
   PM.add(new MachineFunctionAnalysis(*this, OptLevel));
@@ -328,11 +317,10 @@
     PM.add(createOptimizeExtsPass());
     if (!DisableMachineLICM)
       PM.add(createMachineLICMPass());
-    if (EnableMachineCSE)
-      PM.add(createMachineCSEPass());
+    PM.add(createMachineCSEPass());
     if (!DisableMachineSink)
       PM.add(createMachineSinkingPass());
-    printAndVerify(PM, "After MachineLICM and MachineSinking",
+    printAndVerify(PM, "After Machine LICM, CSE and Sinking passes",
                    /* allowDoubleDefs= */ true);
   }
 
@@ -352,12 +340,18 @@
   PM.add(createRegisterAllocator());
   printAndVerify(PM, "After Register Allocation");
 
-  // Perform stack slot coloring.
-  if (OptLevel != CodeGenOpt::None && !DisableSSC) {
+  // Perform stack slot coloring and post-ra machine LICM.
+  if (OptLevel != CodeGenOpt::None) {
     // FIXME: Re-enable coloring with register when it's capable of adding
     // kill markers.
-    PM.add(createStackSlotColoringPass(false));
-    printAndVerify(PM, "After StackSlotColoring");
+    if (!DisableSSC)
+      PM.add(createStackSlotColoringPass(false));
+
+    // Run post-ra machine LICM to hoist reloads / remats.
+    if (!DisablePostRAMachineLICM)
+      PM.add(createMachineLICMPass(false));
+
+    printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
   }
 
   // Run post-ra passes.
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index e207f60..025ad05 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -303,9 +303,7 @@
           // otherwise mark it as ~1U so it can be nuked later.
           if (ValNo->id == getNumValNums()-1) {
             do {
-              VNInfo *VNI = valnos.back();
               valnos.pop_back();
-              VNI->~VNInfo();
             } while (!valnos.empty() && valnos.back()->isUnused());
           } else {
             ValNo->setIsUnused(true);
@@ -351,9 +349,7 @@
   // otherwise mark it as ~1U so it can be nuked later.
   if (ValNo->id == getNumValNums()-1) {
     do {
-      VNInfo *VNI = valnos.back();
       valnos.pop_back();
-      VNI->~VNInfo();
     } while (!valnos.empty() && valnos.back()->isUnused());
   } else {
     ValNo->setIsUnused(true);
@@ -579,9 +575,7 @@
         // mark it as ~1U so it can be nuked later.
         if (V1->id == getNumValNums()-1) {
           do {
-            VNInfo *VNI = valnos.back();
             valnos.pop_back();
-            VNI->~VNInfo();
           } while (!valnos.empty() && valnos.back()->isUnused());
         } else {
           V1->setIsUnused(true);
@@ -597,7 +591,7 @@
 /// used with an unknown definition value.
 void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
                                         const LiveInterval &Clobbers,
-                                        BumpPtrAllocator &VNInfoAllocator) {
+                                        VNInfo::Allocator &VNInfoAllocator) {
   if (Clobbers.empty()) return;
   
   DenseMap<VNInfo*, VNInfo*> ValNoMaps;
@@ -658,14 +652,13 @@
   if (UnusedValNo) {
     // Delete the last unused val#.
     valnos.pop_back();
-    UnusedValNo->~VNInfo();
   }
 }
 
 void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
                                        SlotIndex Start,
                                        SlotIndex End,
-                                       BumpPtrAllocator &VNInfoAllocator) {
+                                       VNInfo::Allocator &VNInfoAllocator) {
   // Find a value # to use for the clobber ranges.  If there is already a value#
   // for unknown values, use it.
   VNInfo *ClobberValNo =
@@ -749,9 +742,7 @@
   // ~1U so it can be nuked later.
   if (V1->id == getNumValNums()-1) {
     do {
-      VNInfo *VNI = valnos.back();
       valnos.pop_back();
-      VNI->~VNInfo();
     } while (valnos.back()->isUnused());
   } else {
     V1->setIsUnused(true);
@@ -762,7 +753,7 @@
 
 void LiveInterval::Copy(const LiveInterval &RHS,
                         MachineRegisterInfo *MRI,
-                        BumpPtrAllocator &VNInfoAllocator) {
+                        VNInfo::Allocator &VNInfoAllocator) {
   ranges.clear();
   valnos.clear();
   std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index ccda66f..bd8a54c 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -91,7 +91,7 @@
   r2iMap_.clear();
 
   // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
-  VNInfoAllocator.Reset();
+  VNInfoAllocator.DestroyAll();
   while (!CloneMIs.empty()) {
     MachineInstr *MI = CloneMIs.back();
     CloneMIs.pop_back();
@@ -141,7 +141,7 @@
     for (MachineBasicBlock::iterator mii = mbbi->begin(),
            mie = mbbi->end(); mii != mie; ++mii) {
       if (mii->isDebugValue())
-        OS << SlotIndex::getEmptyKey() << '\t' << *mii;
+        OS << "    \t" << *mii;
       else
         OS << getInstructionIndex(mii) << '\t' << *mii;
     }
@@ -218,9 +218,9 @@
   return false;
 }
 
-/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
-/// it can check use as well.
-bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
+/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except
+/// it checks for sub-register reference and it can check use as well.
+bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li,
                                             unsigned Reg, bool CheckUse,
                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
   for (LiveInterval::Ranges::const_iterator
@@ -583,6 +583,16 @@
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
   // be considered a livein.
   MachineBasicBlock::iterator mi = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  // Skip over DBG_VALUE at the start of the MBB.
+  if (mi != E && mi->isDebugValue()) {
+    while (++mi != E && mi->isDebugValue())
+      ;
+    if (mi == E)
+      // MBB is empty except for DBG_VALUE's.
+      return;
+  }
+
   SlotIndex baseIndex = MIIdx;
   SlotIndex start = baseIndex;
   if (getInstructionFromIndex(baseIndex) == 0)
@@ -591,15 +601,7 @@
   SlotIndex end = baseIndex;
   bool SeenDefUse = false;
 
-  MachineBasicBlock::iterator E = MBB->end();  
   while (mi != E) {
-    if (mi->isDebugValue()) {
-      ++mi;
-      if (mi != E && !mi->isDebugValue()) {
-        baseIndex = indexes_->getNextNonNullIndex(baseIndex);
-      }
-      continue;
-    }
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(dbgs() << " killed");
       end = baseIndex.getDefIndex();
@@ -616,10 +618,11 @@
       break;
     }
 
-    ++mi;
-    if (mi != E && !mi->isDebugValue()) {
+    while (++mi != E && mi->isDebugValue())
+      // Skip over DBG_VALUE.
+      ;
+    if (mi != E)
       baseIndex = indexes_->getNextNonNullIndex(baseIndex);
-    }
   }
 
   // Live-in register might not be used at all.
@@ -665,7 +668,7 @@
     DEBUG(dbgs() << MBB->getName() << ":\n");
 
     // Create intervals for live-ins to this BB first.
-    for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(),
+    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
            LE = MBB->livein_end(); LI != LE; ++LI) {
       handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
       // Multiple live-ins can alias the same register.
@@ -816,8 +819,9 @@
   unsigned ImpUse = getReMatImplicitUse(li, MI);
   if (ImpUse) {
     const LiveInterval &ImpLi = getInterval(ImpUse);
-    for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg),
-           re = mri_->use_end(); ri != re; ++ri) {
+    for (MachineRegisterInfo::use_nodbg_iterator
+           ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end();
+         ri != re; ++ri) {
       MachineInstr *UseMI = &*ri;
       SlotIndex UseIdx = getInstructionIndex(UseMI);
       if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
@@ -1049,7 +1053,7 @@
       // all of its uses are rematerialized, simply delete it.
       if (MI == ReMatOrigDefMI && CanDelete) {
         DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
-                     << MI << '\n');
+                     << *MI << '\n');
         RemoveMachineInstrFromMaps(MI);
         vrm.RemoveMachineInstrFromMaps(MI);
         MI->eraseFromParent();
@@ -1292,9 +1296,25 @@
     MachineOperand &O = ri.getOperand();
     ++ri;
     if (MI->isDebugValue()) {
-      // Remove debug info for now.
-      O.setReg(0U);
+      // Modify DBG_VALUE now that the value is in a spill slot.
+      if (Slot == VirtRegMap::NO_STACK_SLOT) {
+        uint64_t Offset = MI->getOperand(1).getImm();
+        const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+        DebugLoc DL = MI->getDebugLoc();
+        if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, Slot,
+                                                           Offset, MDPtr, DL)) {
+          DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+          ReplaceMachineInstrInMaps(MI, NewDV);
+          MachineBasicBlock *MBB = MI->getParent();
+          MBB->insert(MBB->erase(MI), NewDV);
+          continue;
+        }
+      }
+
       DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      RemoveMachineInstrFromMaps(MI);
+      vrm.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
       continue;
     }
     assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
@@ -1517,6 +1537,12 @@
     MachineOperand &O = ri.getOperand();
     MachineInstr *MI = &*ri;
     ++ri;
+    if (MI->isDebugValue()) {
+      // Remove debug info for now.
+      O.setReg(0U);
+      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      continue;
+    }
     if (O.isDef()) {
       assert(MI->isImplicitDef() &&
              "Register def was not rewritten?");
@@ -2009,6 +2035,8 @@
          E = mri_->reg_end(); I != E; ++I) {
     MachineOperand &O = I.getOperand();
     MachineInstr *MI = O.getParent();
+    if (MI->isDebugValue())
+      continue;
     SlotIndex Index = getInstructionIndex(MI);
     if (pli.liveAt(Index))
       ++NumConflicts;
@@ -2049,7 +2077,7 @@
          E = mri_->reg_end(); I != E; ++I) {
     MachineOperand &O = I.getOperand();
     MachineInstr *MI = O.getParent();
-    if (SeenMIs.count(MI))
+    if (MI->isDebugValue() || SeenMIs.count(MI))
       continue;
     SeenMIs.insert(MI);
     SlotIndex Index = getInstructionIndex(MI);
@@ -2073,7 +2101,7 @@
               << "constraints:\n";
           MI->print(Msg, tm_);
         }
-        llvm_report_error(Msg.str());
+        report_fatal_error(Msg.str());
       }
       for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
         if (!hasInterval(*AS))
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index d2f3775..798b9b9 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -36,7 +36,7 @@
 
 void LiveStacks::releaseMemory() {
   // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
-  VNInfoAllocator.Reset();
+  VNInfoAllocator.DestroyAll();
   S2IMap.clear();
   S2RCMap.clear();
 }
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 519990e..079684e 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -531,7 +531,7 @@
 
     // Mark live-in registers as live-in.
     SmallVector<unsigned, 4> Defs;
-    for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
+    for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
            EE = MBB->livein_end(); II != EE; ++II) {
       assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
              "Cannot have a live-in virtual register!");
@@ -556,17 +556,21 @@
       if (MI->isPHI())
         NumOperandsToProcess = 1;
 
+      // Clear kill and dead markers. LV will recompute them.
       SmallVector<unsigned, 4> UseRegs;
       SmallVector<unsigned, 4> DefRegs;
       for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
-        const MachineOperand &MO = MI->getOperand(i);
+        MachineOperand &MO = MI->getOperand(i);
         if (!MO.isReg() || MO.getReg() == 0)
           continue;
         unsigned MOReg = MO.getReg();
-        if (MO.isUse())
+        if (MO.isUse()) {
+          MO.setIsKill(false);
           UseRegs.push_back(MOReg);
-        if (MO.isDef())
+        } else /*MO.isDef()*/ {
+          MO.setIsDead(false);
           DefRegs.push_back(MOReg);
+        }
       }
 
       // Process all uses.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 64134ce..eaaa1f8 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
@@ -41,13 +42,13 @@
 
 /// getSymbol - Return the MCSymbol for this basic block.
 ///
-MCSymbol *MachineBasicBlock::getSymbol(MCContext &Ctx) const {
-  SmallString<60> Name;
+MCSymbol *MachineBasicBlock::getSymbol() const {
   const MachineFunction *MF = getParent();
-  raw_svector_ostream(Name)
-    << MF->getTarget().getMCAsmInfo()->getPrivateGlobalPrefix() << "BB"
-    << MF->getFunctionNumber() << '_' << getNumber();
-  return Ctx.GetOrCreateSymbol(Name.str());
+  MCContext &Ctx = MF->getContext();
+  const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+  return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+                               Twine(MF->getFunctionNumber()) + "_" +
+                               Twine(getNumber()));
 }
 
 
@@ -190,7 +191,7 @@
   const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();  
   if (!livein_empty()) {
     OS << "    Live Ins:";
-    for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+    for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
       OutputReg(OS, *I, TRI);
     OS << '\n';
   }
@@ -217,13 +218,14 @@
 }
 
 void MachineBasicBlock::removeLiveIn(unsigned Reg) {
-  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
-  assert(I != livein_end() && "Not a live in!");
+  std::vector<unsigned>::iterator I =
+    std::find(LiveIns.begin(), LiveIns.end(), Reg);
+  assert(I != LiveIns.end() && "Not a live in!");
   LiveIns.erase(I);
 }
 
 bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
-  const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
   return I != livein_end();
 }
 
@@ -459,54 +461,41 @@
   //    conditional branch followed by an unconditional branch. DestA is the
   //    'true' destination and DestB is the 'false' destination.
 
-  bool MadeChange = false;
-  bool AddedFallThrough = false;
+  bool Changed = false;
 
   MachineFunction::iterator FallThru =
     llvm::next(MachineFunction::iterator(this));
-  
-  if (isCond) {
-    // If this block ends with a conditional branch that falls through to its
-    // successor, set DestB as the successor.
-    if (DestB == 0 && FallThru != getParent()->end()) {
+
+  if (DestA == 0 && DestB == 0) {
+    // Block falls through to successor.
+    DestA = FallThru;
+    DestB = FallThru;
+  } else if (DestA != 0 && DestB == 0) {
+    if (isCond)
+      // Block ends in conditional jump that falls through to successor.
       DestB = FallThru;
-      AddedFallThrough = true;
-    }
   } else {
-    // If this is an unconditional branch with no explicit dest, it must just be
-    // a fallthrough into DestA.
-    if (DestA == 0 && FallThru != getParent()->end()) {
-      DestA = FallThru;
-      AddedFallThrough = true;
-    }
+    assert(DestA && DestB && isCond &&
+           "CFG in a bad state. Cannot correct CFG edges");
   }
-  
+
+  // Remove superfluous edges. I.e., those which aren't destinations of this
+  // basic block, duplicate edges, or landing pads.
+  SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
   MachineBasicBlock::succ_iterator SI = succ_begin();
-  MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
   while (SI != succ_end()) {
     const MachineBasicBlock *MBB = *SI;
-    if (MBB == DestA) {
-      DestA = 0;
-      ++SI;
-    } else if (MBB == DestB) {
-      DestB = 0;
-      ++SI;
-    } else if (MBB->isLandingPad() && 
-               MBB != OrigDestA && MBB != OrigDestB) {
-      ++SI;
-    } else {
-      // Otherwise, this is a superfluous edge, remove it.
+    if (!SeenMBBs.insert(MBB) ||
+        (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+      // This is a superfluous edge, remove it.
       SI = removeSuccessor(SI);
-      MadeChange = true;
+      Changed = true;
+    } else {
+      ++SI;
     }
   }
 
-  if (!AddedFallThrough)
-    assert(DestA == 0 && DestB == 0 && "MachineCFG is missing edges!");
-  else if (isCond)
-    assert(DestA == 0 && "MachineCFG is missing edges!");
-
-  return MadeChange;
+  return Changed;
 }
 
 /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index b376e3d..f1c3fdc 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
@@ -33,9 +34,9 @@
   class MachineCSE : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-    MachineRegisterInfo  *MRI;
-    MachineDominatorTree *DT;
     AliasAnalysis *AA;
+    MachineDominatorTree *DT;
+    MachineRegisterInfo *MRI;
   public:
     static char ID; // Pass identification
     MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {}
@@ -51,9 +52,12 @@
     }
 
   private:
-    unsigned CurrVN;
+    typedef ScopedHashTableScope<MachineInstr*, unsigned,
+                                 MachineInstrExpressionTrait> ScopeType;
+    DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
     ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
     SmallVector<MachineInstr*, 64> Exps;
+    unsigned CurrVN;
 
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
@@ -61,7 +65,15 @@
                                 MachineBasicBlock::const_iterator E);
     bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isCSECandidate(MachineInstr *MI);
-    bool ProcessBlock(MachineDomTreeNode *Node);
+    bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+                           MachineInstr *CSMI, MachineInstr *MI);
+    void EnterScope(MachineBasicBlock *MBB);
+    void ExitScope(MachineBasicBlock *MBB);
+    bool ProcessBlock(MachineBasicBlock *MBB);
+    void ExitScopeIfDone(MachineDomTreeNode *Node,
+                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+    bool PerformCSE(MachineDomTreeNode *Node);
   };
 } // end anonymous namespace
 
@@ -92,7 +104,16 @@
     if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
         TargetRegisterInfo::isVirtualRegister(SrcReg) &&
         !SrcSubIdx && !DstSubIdx) {
+      const TargetRegisterClass *SRC   = MRI->getRegClass(SrcReg);
+      const TargetRegisterClass *RC    = MRI->getRegClass(Reg);
+      const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
+      if (!NewRC)
+        continue;
+      DEBUG(dbgs() << "Coalescing: " << *DefMI);
+      DEBUG(dbgs() << "*** to: " << *MI);
       MO.setReg(SrcReg);
+      if (NewRC != SRC)
+        MRI->setRegClass(SrcReg, NewRC);
       DefMI->eraseFromParent();
       ++NumCoalesces;
       Changed = true;
@@ -106,13 +127,15 @@
                                         MachineBasicBlock::const_iterator I,
                                         MachineBasicBlock::const_iterator E) {
   unsigned LookAheadLeft = 5;
-  while (LookAheadLeft--) {
+  while (LookAheadLeft) {
+    // Skip over dbg_value's.
+    while (I != E && I->isDebugValue())
+      ++I;
+
     if (I == E)
       // Reached end of block, register is obviously dead.
       return true;
 
-    if (I->isDebugValue())
-      continue;
     bool SeenDef = false;
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = I->getOperand(i);
@@ -128,11 +151,15 @@
       // See a def of Reg (or an alias) before encountering any use, it's 
       // trivially dead.
       return true;
+
+    --LookAheadLeft;
     ++I;
   }
   return false;
 }
 
+/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
+/// physical registers (except for dead defs of physical registers).
 bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){
   unsigned PhysDef = 0;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -167,12 +194,19 @@
   return false;
 }
 
-bool MachineCSE::isCSECandidate(MachineInstr *MI) {
-  // Ignore copies or instructions that read / write physical registers
-  // (except for dead defs of physical registers).
+static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
   unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-  if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
-      MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg())
+  return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
+    MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg();
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+  if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+      MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
+    return false;
+
+  // Ignore copies.
+  if (isCopy(MI, TII))
     return false;
 
   // Ignore stuff that we obviously can't move.
@@ -194,12 +228,83 @@
   return true;
 }
 
-bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
+/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
+/// common expression that defines Reg.
+bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+                                   MachineInstr *CSMI, MachineInstr *MI) {
+  // FIXME: Heuristics that works around the lack the live range splitting.
+
+  // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an
+  // immediate predecessor. We don't want to increase register pressure and end up
+  // causing other computation to be spilled.
+  if (MI->getDesc().isAsCheapAsAMove()) {
+    MachineBasicBlock *CSBB = CSMI->getParent();
+    MachineBasicBlock *BB = MI->getParent();
+    if (CSBB != BB && 
+        std::find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end())
+      return false;
+  }
+
+  // Heuristics #2: If the expression doesn't not use a vr and the only use
+  // of the redundant computation are copies, do not cse.
+  bool HasVRegUse = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isUse() && MO.getReg() &&
+        TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+      HasVRegUse = true;
+      break;
+    }
+  }
+  if (!HasVRegUse) {
+    bool HasNonCopyUse = false;
+    for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(Reg),
+           E = MRI->use_nodbg_end(); I != E; ++I) {
+      MachineInstr *Use = &*I;
+      // Ignore copies.
+      if (!isCopy(Use, TII)) {
+        HasNonCopyUse = true;
+        break;
+      }
+    }
+    if (!HasNonCopyUse)
+      return false;
+  }
+
+  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
+  // it unless the defined value is already used in the BB of the new use.
+  bool HasPHI = false;
+  SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
+  for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(CSReg),
+       E = MRI->use_nodbg_end(); I != E; ++I) {
+    MachineInstr *Use = &*I;
+    HasPHI |= Use->isPHI();
+    CSBBs.insert(Use->getParent());
+  }
+
+  if (!HasPHI)
+    return true;
+  return CSBBs.count(MI->getParent());
+}
+
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+  ScopeType *Scope = new ScopeType(VNT);
+  ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+  assert(SI != ScopeMap.end());
+  ScopeMap.erase(SI);
+  delete SI->second;
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
   bool Changed = false;
 
-  ScopedHashTableScope<MachineInstr*, unsigned,
-    MachineInstrExpressionTrait> VNTS(VNT);
-  MachineBasicBlock *MBB = Node->getBlock();
+  SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
     MachineInstr *MI = &*I;
     ++I;
@@ -210,8 +315,12 @@
     bool FoundCSE = VNT.count(MI);
     if (!FoundCSE) {
       // Look for trivial copy coalescing opportunities.
-      if (PerformTrivialCoalescing(MI, MBB))
+      if (PerformTrivialCoalescing(MI, MBB)) {
+        // After coalescing MI itself may become a copy.
+        if (isCopy(MI, TII))
+          continue;
         FoundCSE = VNT.count(MI);
+      }
     }
     // FIXME: commute commutable instructions?
 
@@ -231,6 +340,9 @@
     MachineInstr *CSMI = Exps[CSVN];
     DEBUG(dbgs() << "Examining: " << *MI);
     DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+
+    // Check if it's profitable to perform this CSE.
+    bool DoCSE = true;
     unsigned NumDefs = MI->getDesc().getNumDefs();
     for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
@@ -243,17 +355,85 @@
       assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
              TargetRegisterInfo::isVirtualRegister(NewReg) &&
              "Do not CSE physical register defs!");
-      MRI->replaceRegWith(OldReg, NewReg);
+      if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+        DoCSE = false;
+        break;
+      }
+      CSEPairs.push_back(std::make_pair(OldReg, NewReg));
       --NumDefs;
     }
-    MI->eraseFromParent();
-    ++NumCSEs;
+
+    // Actually perform the elimination.
+    if (DoCSE) {
+      for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i)
+        MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
+      MI->eraseFromParent();
+      ++NumCSEs;
+    } else {
+      DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+      VNT.insert(MI, CurrVN++);
+      Exps.push_back(MI);
+    }
+    CSEPairs.clear();
   }
 
-  // Recursively call ProcessBlock with childred.
-  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
-  for (unsigned i = 0, e = Children.size(); i != e; ++i)
-    Changed |= ProcessBlock(Children[i]);
+  return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+  if (OpenChildren[Node])
+    return;
+
+  // Pop scope.
+  ExitScope(Node->getBlock());
+
+  // Now traverse upwards to pop ancestors whose offsprings are all done.
+  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+    unsigned Left = --OpenChildren[Parent];
+    if (Left != 0)
+      break;
+    ExitScope(Parent->getBlock());
+    Node = Parent;
+  }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+  SmallVector<MachineDomTreeNode*, 32> Scopes;
+  SmallVector<MachineDomTreeNode*, 8> WorkList;
+  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+  // Perform a DFS walk to determine the order of visit.
+  WorkList.push_back(Node);
+  do {
+    Node = WorkList.pop_back_val();
+    Scopes.push_back(Node);
+    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+    unsigned NumChildren = Children.size();
+    OpenChildren[Node] = NumChildren;
+    for (unsigned i = 0; i != NumChildren; ++i) {
+      MachineDomTreeNode *Child = Children[i];
+      ParentMap[Child] = Node;
+      WorkList.push_back(Child);
+    }
+  } while (!WorkList.empty());
+
+  // Now perform CSE.
+  bool Changed = false;
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+    MachineDomTreeNode *Node = Scopes[i];
+    MachineBasicBlock *MBB = Node->getBlock();
+    EnterScope(MBB);
+    Changed |= ProcessBlock(MBB);
+    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+    ExitScopeIfDone(Node, OpenChildren, ParentMap);
+  }
 
   return Changed;
 }
@@ -262,7 +442,7 @@
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
   MRI = &MF.getRegInfo();
-  DT = &getAnalysis<MachineDominatorTree>();
   AA = &getAnalysis<AliasAnalysis>();
-  return ProcessBlock(DT->getRootNode());
+  DT = &getAnalysis<MachineDominatorTree>();
+  return PerformCSE(DT->getRootNode());
 }
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 4377d5b..3cf10b3 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -39,40 +40,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-namespace {
-  struct Printer : public MachineFunctionPass {
-    static char ID;
-
-    raw_ostream &OS;
-    const std::string Banner;
-
-    Printer(raw_ostream &os, const std::string &banner) 
-      : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
-
-    const char *getPassName() const { return "MachineFunction Printer"; }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-    bool runOnMachineFunction(MachineFunction &MF) {
-      OS << "# " << Banner << ":\n";
-      MF.print(OS);
-      return false;
-    }
-  };
-  char Printer::ID = 0;
-}
-
-/// Returns a newly-created MachineFunction Printer pass. The default banner is
-/// empty.
-///
-FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS,
-                                                     const std::string &Banner){
-  return new Printer(OS, Banner);
-}
-
 //===----------------------------------------------------------------------===//
 // MachineFunction implementation
 //===----------------------------------------------------------------------===//
@@ -84,22 +51,19 @@
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
-MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
-                                 unsigned FunctionNum)
-  : Fn(F), Target(TM) {
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+                                 unsigned FunctionNum, MachineModuleInfo &mmi)
+  : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) {
   if (TM.getRegisterInfo())
-    RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
-                  MachineRegisterInfo(*TM.getRegisterInfo());
+    RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
   else
     RegInfo = 0;
   MFInfo = 0;
-  FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
-                  MachineFrameInfo(*TM.getFrameInfo());
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo());
   if (Fn->hasFnAttr(Attribute::StackAlignment))
     FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
         Fn->getAttributes().getFnAttributes()));
-  ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
-                     MachineConstantPool(TM.getTargetData());
+  ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
   Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
   FunctionNumber = FunctionNum;
   JumpTableInfo = 0;
@@ -132,7 +96,7 @@
 getOrCreateJumpTableInfo(unsigned EntryKind) {
   if (JumpTableInfo) return JumpTableInfo;
   
-  JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
+  JumpTableInfo = new (Allocator)
     MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
   return JumpTableInfo;
 }
@@ -229,14 +193,13 @@
 MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
                                       int64_t o, uint64_t s,
                                       unsigned base_alignment) {
-  return new (Allocator.Allocate<MachineMemOperand>())
-             MachineMemOperand(v, f, o, s, base_alignment);
+  return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment);
 }
 
 MachineMemOperand *
 MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
                                       int64_t Offset, uint64_t Size) {
-  return new (Allocator.Allocate<MachineMemOperand>())
+  return new (Allocator)
              MachineMemOperand(MMO->getValue(), MMO->getFlags(),
                                int64_t(uint64_t(MMO->getOffset()) +
                                        uint64_t(Offset)),
@@ -440,15 +403,6 @@
   return VReg;
 }
 
-/// getDILocation - Get the DILocation for a given DebugLoc object.
-DILocation MachineFunction::getDILocation(DebugLoc DL) const {
-  unsigned Idx = DL.getIndex();
-  assert(Idx < DebugLocInfo.DebugLocations.size() &&
-         "Invalid index into debug locations!");
-  return DILocation(DebugLocInfo.DebugLocations[Idx]);
-}
-
-
 /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
 /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
 /// normal 'L' label is returned.
@@ -572,6 +526,8 @@
   case MachineJumpTableInfo::EK_LabelDifference32:
   case MachineJumpTableInfo::EK_Custom32:
     return 4;
+  case MachineJumpTableInfo::EK_Inline:
+    return 0;
   }
   assert(0 && "Unknown jump table encoding!");
   return ~0;
@@ -589,22 +545,22 @@
   case MachineJumpTableInfo::EK_LabelDifference32:
   case MachineJumpTableInfo::EK_Custom32:
     return TD.getABIIntegerTypeAlignment(32);
+  case MachineJumpTableInfo::EK_Inline:
+    return 1;
   }
   assert(0 && "Unknown jump table encoding!");
   return ~0;
 }
 
-/// getJumpTableIndex - Create a new jump table entry in the jump table info
-/// or return an existing one.
+/// createJumpTableIndex - Create a new jump table entry in the jump table info.
 ///
-unsigned MachineJumpTableInfo::getJumpTableIndex(
+unsigned MachineJumpTableInfo::createJumpTableIndex(
                                const std::vector<MachineBasicBlock*> &DestBBs) {
   assert(!DestBBs.empty() && "Cannot create an empty jump table!");
   JumpTables.push_back(MachineJumpTableEntry(DestBBs));
   return JumpTables.size()-1;
 }
 
-
 /// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
 /// the jump tables to branch to New instead.
 bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
@@ -674,7 +630,7 @@
 
 /// CanShareConstantPoolEntry - Test whether the given two constants
 /// can be allocated the same constant pool entry.
-static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
                                       const TargetData *TD) {
   // Handle the trivial case quickly.
   if (A == B) return true;
@@ -689,17 +645,17 @@
 
   // If a floating-point value and an integer value have the same encoding,
   // they can share a constant-pool entry.
-  if (ConstantFP *AFP = dyn_cast<ConstantFP>(A))
-    if (ConstantInt *BI = dyn_cast<ConstantInt>(B))
+  if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
+    if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
       return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
-  if (ConstantFP *BFP = dyn_cast<ConstantFP>(B))
-    if (ConstantInt *AI = dyn_cast<ConstantInt>(A))
+  if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
+    if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
       return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
 
   // Two vectors can share an entry if each pair of corresponding
   // elements could.
-  if (ConstantVector *AV = dyn_cast<ConstantVector>(A))
-    if (ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
+  if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
+    if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
       if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
         return false;
       for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
@@ -718,7 +674,7 @@
 /// an existing one.  User must specify the log2 of the minimum required
 /// alignment for the object.
 ///
-unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, 
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, 
                                                    unsigned Alignment) {
   assert(Alignment && "Alignment must be specified!");
   if (Alignment > PoolAlignment) PoolAlignment = Alignment;
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 8d87e3e..07a0f45 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 using namespace llvm;
 
 // Register this pass with PassInfo directly to avoid having to define
@@ -34,9 +35,24 @@
   assert(!MF && "MachineFunctionAnalysis left initialized!");
 }
 
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
+}
+
+bool MachineFunctionAnalysis::doInitialization(Module &M) {
+  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  assert(MMI && "MMI not around yet??");
+  MMI->setModule(&M);
+  NextFnNum = 0;
+  return false;
+}
+
+
 bool MachineFunctionAnalysis::runOnFunction(Function &F) {
   assert(!MF && "MachineFunctionAnalysis already initialized!");
-  MF = new MachineFunction(&F, TM, NextFnNum++);
+  MF = new MachineFunction(&F, TM, NextFnNum++,
+                           getAnalysis<MachineModuleInfo>());
   return false;
 }
 
@@ -44,7 +60,3 @@
   delete MF;
   MF = 0;
 }
-
-void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-}
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 2f8d4c9e..e5a4912 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -15,8 +15,14 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 using namespace llvm;
 
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+                                             const std::string &Banner) const {
+  return createMachineFunctionPrinterPass(O, Banner);
+}
+
 bool MachineFunctionPass::runOnFunction(Function &F) {
   // Do not codegen any 'available_externally' functions at all, they have
   // definitions outside the translation unit.
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 0000000..547c4fe
--- /dev/null
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,60 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+  static char ID;
+
+  raw_ostream &OS;
+  const std::string Banner;
+
+  MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) 
+      : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
+
+  const char *getPassName() const { return "MachineFunction Printer"; }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) {
+    OS << "# " << Banner << ":\n";
+    MF.print(OS);
+    return false;
+  }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+                                                      const std::string &Banner){
+  return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e23670d..9f855d9 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/Metadata.h"
 #include "llvm/Type.h"
 #include "llvm/Value.h"
 #include "llvm/Assembly/Writer.h"
@@ -23,6 +24,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetInstrDesc.h"
@@ -35,7 +37,6 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/Metadata.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -189,6 +190,10 @@
            getOffset() == Other.getOffset();
   case MachineOperand::MO_BlockAddress:
     return getBlockAddress() == Other.getBlockAddress();
+  case MachineOperand::MO_MCSymbol:
+    return getMCSymbol() == Other.getMCSymbol();
+  case MachineOperand::MO_Metadata:
+    return getMetadata() == Other.getMetadata();
   }
 }
 
@@ -291,6 +296,9 @@
     WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
     OS << '>';
     break;
+  case MachineOperand::MO_MCSymbol:
+    OS << "<MCSym=" << *getMCSymbol() << '>';
+    break;
   default:
     llvm_unreachable("Unrecognized operand type");
   }
@@ -389,7 +397,7 @@
 /// TID NULL and no operands.
 MachineInstr::MachineInstr()
   : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
-    Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
+    Parent(0) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
 }
@@ -403,20 +411,14 @@
       addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
-/// MachineInstr ctor - This constructor create a MachineInstr and add the
-/// implicit operands. It reserves space for number of operands specified by
-/// TargetInstrDesc or the numOperands if it is not zero. (for
-/// instructions with variable number of operands).
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the TargetInstrDesc.
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0),
-    debugLoc(DebugLoc::getUnknownLoc()) {
-  if (!NoImp && TID->getImplicitDefs())
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (!NoImp && TID->getImplicitUses())
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+    MemRefs(0), MemRefsEnd(0), Parent(0) {
+  if (!NoImp)
+    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
@@ -429,12 +431,8 @@
                            bool NoImp)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(dl) {
-  if (!NoImp && TID->getImplicitDefs())
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (!NoImp && TID->getImplicitUses())
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  if (!NoImp)
+    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
@@ -445,18 +443,11 @@
 /// MachineInstr ctor - Work exactly the same as the ctor two above, except
 /// that the MachineInstr is created and added to the end of the specified 
 /// basic block.
-///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0), 
-    debugLoc(DebugLoc::getUnknownLoc()) {
+    MemRefs(0), MemRefsEnd(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  if (TID->ImplicitDefs)
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (TID->ImplicitUses)
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -471,12 +462,7 @@
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  if (TID->ImplicitDefs)
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (TID->ImplicitUses)
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -1119,6 +1105,19 @@
   return Reg;
 }
 
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+  for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || MO.isUse())
+      continue;
+    if (!MO.isDead())
+      return false;
+  }
+  return true;
+}
+
 void MachineInstr::dump() const {
   dbgs() << "  " << *this;
 }
@@ -1215,17 +1214,16 @@
 
     // TODO: print InlinedAtLoc information
 
-    DILocation DLT = MF->getDILocation(debugLoc);
-    DIScope Scope = DLT.getScope();
+    DIScope Scope(debugLoc.getScope(MF->getFunction()->getContext()));
     OS << " dbg:";
     // Omit the directory, since it's usually long and uninteresting.
-    if (!Scope.isNull())
+    if (Scope.Verify())
       OS << Scope.getFilename();
     else
       OS << "<unknown>";
-    OS << ':' << DLT.getLineNumber();
-    if (DLT.getColumnNumber() != 0)
-      OS << ':' << DLT.getColumnNumber();
+    OS << ':' << debugLoc.getLine();
+    if (debugLoc.getCol() != 0)
+      OS << ':' << debugLoc.getCol();
   }
 
   OS << "\n";
@@ -1363,30 +1361,33 @@
     const MachineOperand &MO = MI->getOperand(i);
     uint64_t Key = (uint64_t)MO.getType() << 32;
     switch (MO.getType()) {
-      default: break;
-      case MachineOperand::MO_Register:
-        if (MO.isDef() && MO.getReg() &&
-            TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-          continue;  // Skip virtual register defs.
-        Key |= MO.getReg();
-        break;
-      case MachineOperand::MO_Immediate:
-        Key |= MO.getImm();
-        break;
-      case MachineOperand::MO_FrameIndex:
-      case MachineOperand::MO_ConstantPoolIndex:
-      case MachineOperand::MO_JumpTableIndex:
-        Key |= MO.getIndex();
-        break;
-      case MachineOperand::MO_MachineBasicBlock:
-        Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
-        break;
-      case MachineOperand::MO_GlobalAddress:
-        Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
-        break;
-      case MachineOperand::MO_BlockAddress:
-        Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
-        break;
+    default: break;
+    case MachineOperand::MO_Register:
+      if (MO.isDef() && MO.getReg() &&
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;  // Skip virtual register defs.
+      Key |= MO.getReg();
+      break;
+    case MachineOperand::MO_Immediate:
+      Key |= MO.getImm();
+      break;
+    case MachineOperand::MO_FrameIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_JumpTableIndex:
+      Key |= MO.getIndex();
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+      break;
+    case MachineOperand::MO_BlockAddress:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+      break;
+    case MachineOperand::MO_MCSymbol:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+      break;
     }
     Key += ~(Key << 32);
     Key ^= (Key >> 22);
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 0361694..b2e757d8d 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,8 +22,8 @@
 
 #define DEBUG_TYPE "machine-licm"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +33,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -41,32 +42,41 @@
 
 STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
 STATISTIC(NumCSEed,   "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+          "Number of machine instructions hoisted out of loops post regalloc");
 
 namespace {
   class MachineLICM : public MachineFunctionPass {
-    MachineConstantPool *MCP;
+    bool PreRegAlloc;
+
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-    BitVector AllocatableSet;
+    const MachineFrameInfo *MFI;
+    MachineRegisterInfo *RegInfo;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
-    MachineLoopInfo      *LI;      // Current MachineLoopInfo
+    MachineLoopInfo      *MLI;     // Current MachineLoopInfo
     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
-    MachineRegisterInfo  *RegInfo; // Machine register information
 
     // State that is updated as we process loops
     bool         Changed;          // True if a loop is changed.
-    bool         FirstInLoop;      // True if it's the first LICM in the loop.
     MachineLoop *CurLoop;          // The current loop we are working on.
     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
 
+    BitVector AllocatableSet;
+
     // For each opcode, keep a list of potentail CSE instructions.
     DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
   public:
     static char ID; // Pass identification, replacement for typeid
-    MachineLICM() : MachineFunctionPass(&ID) {}
+    MachineLICM() :
+      MachineFunctionPass(&ID), PreRegAlloc(true) {}
+
+    explicit MachineLICM(bool PreRA) :
+      MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -88,6 +98,39 @@
     }
 
   private:
+    /// CandidateInfo - Keep track of information about hoisting candidates.
+    struct CandidateInfo {
+      MachineInstr *MI;
+      unsigned      Def;
+      int           FI;
+      CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+        : MI(mi), Def(def), FI(fi) {}
+    };
+
+    /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+    /// invariants out to the preheader.
+    void HoistRegionPostRA();
+
+    /// HoistPostRA - When an instruction is found to only use loop invariant
+    /// operands that is safe to hoist, this instruction is called to do the
+    /// dirty work.
+    void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+    /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+    /// gather register def and frame object update information.
+    void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+                   SmallSet<int, 32> &StoredFIs,
+                   SmallVector<CandidateInfo, 32> &Candidates);
+
+    /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
+    /// current loop.
+    void AddToLiveIns(unsigned Reg);
+
+    /// IsLICMCandidate - Returns true if the instruction may be a suitable
+    /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+    /// not safe to hoist it.
+    bool IsLICMCandidate(MachineInstr &I);
+
     /// IsLoopInvariantInst - Returns true if the instruction is loop
     /// invariant. I.e., all virtual register operands are defined outside of
     /// the loop, physical registers aren't accessed (explicitly or implicitly),
@@ -145,7 +188,9 @@
 static RegisterPass<MachineLICM>
 X("machinelicm", "Machine Loop Invariant Code Motion");
 
-FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); }
+FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
+  return new MachineLICM(PreRegAlloc);
+}
 
 /// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most
 /// loop that has a preheader.
@@ -156,31 +201,31 @@
   return true;
 }
 
-/// Hoist expressions out of the specified loop. Note, alias info for inner loop
-/// is not preserved so it is not a good idea to run LICM multiple times on one
-/// loop.
-///
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "******** Machine LICM ********\n");
+  if (PreRegAlloc)
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n");
+  else
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
 
-  Changed = FirstInLoop = false;
-  MCP = MF.getConstantPool();
+  Changed = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
   TRI = TM->getRegisterInfo();
+  MFI = MF.getFrameInfo();
   RegInfo = &MF.getRegInfo();
   AllocatableSet = TRI->getAllocatableSet(MF);
 
   // Get our Loop information...
-  LI = &getAnalysis<MachineLoopInfo>();
-  DT = &getAnalysis<MachineDominatorTree>();
-  AA = &getAnalysis<AliasAnalysis>();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  DT  = &getAnalysis<MachineDominatorTree>();
+  AA  = &getAnalysis<AliasAnalysis>();
 
-  for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){
     CurLoop = *I;
 
-    // Only visit outer-most preheader-sporting loops.
-    if (!LoopIsOuterMostWithPreheader(CurLoop))
+    // If this is done before regalloc, only visit outer-most preheader-sporting
+    // loops.
+    if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop))
       continue;
 
     // Determine the block to which to hoist instructions. If we can't find a
@@ -193,16 +238,230 @@
     if (!CurPreheader)
       continue;
 
-    // CSEMap is initialized for loop header when the first instruction is
-    // being hoisted.
-    FirstInLoop = true;
-    HoistRegion(DT->getNode(CurLoop->getHeader()));
-    CSEMap.clear();
+    if (!PreRegAlloc)
+      HoistRegionPostRA();
+    else {
+      // CSEMap is initialized for loop header when the first instruction is
+      // being hoisted.
+      MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+      HoistRegion(N);
+      CSEMap.clear();
+    }
   }
 
   return Changed;
 }
 
+/// InstructionStoresToFI - Return true if instruction stores to the
+/// specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end(); o != oe; ++o) {
+    if (!(*o)->isStore() || !(*o)->getValue())
+      continue;
+    if (const FixedStackPseudoSourceValue *Value =
+        dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+      if (Value->getFrameIndex() == FI)
+        return true;
+    }
+  }
+  return false;
+}
+
+/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+                            unsigned *PhysRegDefs,
+                            SmallSet<int, 32> &StoredFIs,
+                            SmallVector<CandidateInfo, 32> &Candidates) {
+  bool RuledOut = false;
+  bool HasNonInvariantUse = false;
+  unsigned Def = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isFI()) {
+      // Remember if the instruction stores to the frame index.
+      int FI = MO.getIndex();
+      if (!StoredFIs.count(FI) &&
+          MFI->isSpillSlotObjectIndex(FI) &&
+          InstructionStoresToFI(MI, FI))
+        StoredFIs.insert(FI);
+      HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+           "Not expecting virtual register!");
+
+    if (!MO.isDef()) {
+      if (Reg && PhysRegDefs[Reg])
+        // If it's using a non-loop-invariant register, then it's obviously not
+        // safe to hoist.
+        HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (MO.isImplicit()) {
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+      if (!MO.isDead())
+        // Non-dead implicit def? This cannot be hoisted.
+        RuledOut = true;
+      // No need to check if a dead implicit def is also defined by
+      // another instruction.
+      continue;
+    }
+
+    // FIXME: For now, avoid instructions with multiple defs, unless
+    // it's a dead implicit def.
+    if (Def)
+      RuledOut = true;
+    else
+      Def = Reg;
+
+    // If we have already seen another instruction that defines the same
+    // register, then this is not safe.
+    if (++PhysRegDefs[Reg] > 1)
+      // MI defined register is seen defined by another instruction in
+      // the loop, it cannot be a LICM candidate.
+      RuledOut = true;
+    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      if (++PhysRegDefs[*AS] > 1)
+        RuledOut = true;
+  }
+
+  // Only consider reloads for now and remats which do not have register
+  // operands. FIXME: Consider unfold load folding instructions.
+  if (Def && !RuledOut) {
+    int FI = INT_MIN;
+    if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+        (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+      Candidates.push_back(CandidateInfo(MI, Def, FI));
+  }
+}
+
+/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+/// invariants out to the preheader.
+void MachineLICM::HoistRegionPostRA() {
+  unsigned NumRegs = TRI->getNumRegs();
+  unsigned *PhysRegDefs = new unsigned[NumRegs];
+  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+
+  SmallVector<CandidateInfo, 32> Candidates;
+  SmallSet<int, 32> StoredFIs;
+
+  // Walk the entire region, count number of defs for each register, and
+  // collect potential LICM candidates.
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    // Conservatively treat live-in's as an external def.
+    // FIXME: That means a reload that're reused in successor block(s) will not
+    // be LICM'ed.
+    for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+           E = BB->livein_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+    }
+
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+    }
+  }
+
+  // Now evaluate whether the potential candidates qualify.
+  // 1. Check if the candidate defined register is defined by another
+  //    instruction in the loop.
+  // 2. If the candidate is a load from stack slot (always true for now),
+  //    check if the slot is stored anywhere in the loop.
+  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
+    if (Candidates[i].FI != INT_MIN &&
+        StoredFIs.count(Candidates[i].FI))
+      continue;
+
+    if (PhysRegDefs[Candidates[i].Def] == 1) {
+      bool Safe = true;
+      MachineInstr *MI = Candidates[i].MI;
+      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+        const MachineOperand &MO = MI->getOperand(j);
+        if (!MO.isReg() || MO.isDef() || !MO.getReg())
+          continue;
+        if (PhysRegDefs[MO.getReg()]) {
+          // If it's using a non-loop-invariant register, then it's obviously
+          // not safe to hoist.
+          Safe = false;
+          break;
+        }
+      }
+      if (Safe)
+        HoistPostRA(MI, Candidates[i].Def);
+    }
+  }
+
+  delete[] PhysRegDefs;
+}
+
+/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
+/// loop, and make sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    if (!BB->isLiveIn(Reg))
+      BB->addLiveIn(Reg);
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+        if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+          MO.setIsKill(false);
+      }
+    }
+  }
+}
+
+/// HoistPostRA - When an instruction is found to only use loop invariant
+/// operands that is safe to hoist, this instruction is called to do the
+/// dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+  // Now move the instructions to the predecessor, inserting it before any
+  // terminator instructions.
+  DEBUG({
+      dbgs() << "Hoisting " << *MI;
+      if (CurPreheader->getBasicBlock())
+        dbgs() << " to MachineBasicBlock "
+               << CurPreheader->getName();
+      if (MI->getParent()->getBasicBlock())
+        dbgs() << " from MachineBasicBlock "
+               << MI->getParent()->getName();
+      dbgs() << "\n";
+    });
+
+  // Splice the instruction to the preheader.
+  MachineBasicBlock *MBB = MI->getParent();
+  CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI);
+
+  // Add register to livein list to all the BBs in the current loop since a 
+  // loop invariant must be kept live throughout the whole loop. This is
+  // important to ensure later passes do not scavenge the def register.
+  AddToLiveIns(Def);
+
+  ++NumPostRAHoisted;
+  Changed = true;
+}
+
 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
 /// first order w.r.t the DominatorTree. This allows us to visit definitions
@@ -223,17 +482,17 @@
   }
 
   const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
-
   for (unsigned I = 0, E = Children.size(); I != E; ++I)
     HoistRegion(Children[I]);
 }
 
-/// IsLoopInvariantInst - Returns true if the instruction is loop
-/// invariant. I.e., all virtual register operands are defined outside of the
-/// loop, physical registers aren't accessed explicitly, and there are no side
-/// effects that aren't captured by the operands or other flags.
-/// 
-bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+/// IsLICMCandidate - Returns true if the instruction may be a suitable
+/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+/// not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+  if (I.isImplicitDef())
+    return false;
+
   const TargetInstrDesc &TID = I.getDesc();
   
   // Ignore stuff that we obviously can't hoist.
@@ -251,6 +510,17 @@
       // This is a trivial form of alias analysis.
       return false;
   }
+  return true;
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+/// 
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+  if (!IsLICMCandidate(I))
+    return false;
 
   // The instruction is loop invariant if all of its operands are.
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
@@ -341,9 +611,6 @@
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
 /// the given loop invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
-  if (MI.isImplicitDef())
-    return false;
-
   // FIXME: For now, only hoist re-materilizable instructions. LICM will
   // increase register pressure. We want to make sure it doesn't increase
   // spilling.
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 5052af7..25284d6 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -10,6 +10,9 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 
 #include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -17,11 +20,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -29,38 +29,268 @@
 
 // Handle the Pass registration stuff necessary to use TargetData's.
 static RegisterPass<MachineModuleInfo>
-X("machinemoduleinfo", "Module Information");
+X("machinemoduleinfo", "Machine Module Information");
 char MachineModuleInfo::ID = 0;
 
 // Out of line virtual method.
 MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
 
+namespace llvm {
+class MMIAddrLabelMapCallbackPtr : CallbackVH {
+  MMIAddrLabelMap *Map;
+public:
+  MMIAddrLabelMapCallbackPtr() : Map(0) {}
+  MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+  
+  void setPtr(BasicBlock *BB) {
+    ValueHandleBase::operator=(BB);
+  }
+    
+  void setMap(MMIAddrLabelMap *map) { Map = map; }
+  
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *V2);
+};
+  
+class MMIAddrLabelMap {
+  MCContext &Context;
+  struct AddrLabelSymEntry {
+    /// Symbols - The symbols for the label.  This is a pointer union that is
+    /// either one symbol (the common case) or a list of symbols.
+    PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
+    
+    Function *Fn;   // The containing function of the BasicBlock.
+    unsigned Index; // The index in BBCallbacks for the BasicBlock.
+  };
+  
+  DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+  
+  /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for.  We
+  /// use this so we get notified if a block is deleted or RAUWd.
+  std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+
+  /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
+  /// whose corresponding BasicBlock got deleted.  These symbols need to be
+  /// emitted at some point in the file, so AsmPrinter emits them after the
+  /// function body.
+  DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+    DeletedAddrLabelsNeedingEmission;
+public:
+  
+  MMIAddrLabelMap(MCContext &context) : Context(context) {}
+  ~MMIAddrLabelMap() {
+    assert(DeletedAddrLabelsNeedingEmission.empty() &&
+           "Some labels for deleted blocks never got emitted");
+    
+    // Deallocate any of the 'list of symbols' case.
+    for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
+         I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
+      if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
+        delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
+  }
+  
+  MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
+  std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+  void takeDeletedSymbolsForFunction(Function *F, 
+                                     std::vector<MCSymbol*> &Result);
+
+  void UpdateForDeletedBlock(BasicBlock *BB);
+  void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+}
+
+MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
+  assert(BB->hasAddressTaken() &&
+         "Shouldn't get label for block without address taken");
+  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+  
+  // If we already had an entry for this block, just return it.
+  if (!Entry.Symbols.isNull()) {
+    assert(BB->getParent() == Entry.Fn && "Parent changed");
+    if (Entry.Symbols.is<MCSymbol*>())
+      return Entry.Symbols.get<MCSymbol*>();
+    return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
+  }
+  
+  // Otherwise, this is a new entry, create a new symbol for it and add an
+  // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+  BBCallbacks.push_back(BB);
+  BBCallbacks.back().setMap(this);
+  Entry.Index = BBCallbacks.size()-1;
+  Entry.Fn = BB->getParent();
+  MCSymbol *Result = Context.CreateTempSymbol();
+  Entry.Symbols = Result;
+  return Result;
+}
+
+std::vector<MCSymbol*>
+MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+  assert(BB->hasAddressTaken() &&
+         "Shouldn't get label for block without address taken");
+  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+  
+  std::vector<MCSymbol*> Result;
+  
+  // If we already had an entry for this block, just return it.
+  if (Entry.Symbols.isNull())
+    Result.push_back(getAddrLabelSymbol(BB));
+  else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>())
+    Result.push_back(Sym);
+  else
+    Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>();
+  return Result;
+}
+
+
+/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
+/// them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+  DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+    DeletedAddrLabelsNeedingEmission.find(F);
+
+  // If there are no entries for the function, just return.
+  if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+  
+  // Otherwise, take the list.
+  std::swap(Result, I->second);
+  DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+
+void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+  // If the block got deleted, there is no need for the symbol.  If the symbol
+  // was already emitted, we can just forget about it, otherwise we need to
+  // queue it up for later emission when the function is output.
+  AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
+  AddrLabelSymbols.erase(BB);
+  assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+  BBCallbacks[Entry.Index] = 0;  // Clear the callback.
+
+  assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) &&
+         "Block/parent mismatch");
+
+  // Handle both the single and the multiple symbols cases.
+  if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
+    if (Sym->isDefined())
+      return;
+  
+    // If the block is not yet defined, we need to emit it at the end of the
+    // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
+    // for the containing Function.  Since the block is being deleted, its
+    // parent may already be removed, we have to get the function from 'Entry'.
+    DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+  } else {
+    std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>();
+
+    for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
+      MCSymbol *Sym = (*Syms)[i];
+      if (Sym->isDefined()) continue;  // Ignore already emitted labels.
+      
+      // If the block is not yet defined, we need to emit it at the end of the
+      // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
+      // for the containing Function.  Since the block is being deleted, its
+      // parent may already be removed, we have to get the function from
+      // 'Entry'.
+      DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+    }
+    
+    // The entry is deleted, free the memory associated with the symbol list.
+    delete Syms;
+  }
+}
+
+void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+  // Get the entry for the RAUW'd block and remove it from our map.
+  AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old];
+  AddrLabelSymbols.erase(Old);
+  assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+
+  AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+  // If New is not address taken, just move our symbol over to it.
+  if (NewEntry.Symbols.isNull()) {
+    BBCallbacks[OldEntry.Index].setPtr(New);    // Update the callback.
+    NewEntry = OldEntry;     // Set New's entry.
+    return;
+  }
+
+  BBCallbacks[OldEntry.Index] = 0;    // Update the callback.
+
+  // Otherwise, we need to add the old symbol to the new block's set.  If it is
+  // just a single entry, upgrade it to a symbol list.
+  if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) {
+    std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>();
+    SymList->push_back(PrevSym);
+    NewEntry.Symbols = SymList;
+  }
+      
+  std::vector<MCSymbol*> *SymList =
+    NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
+
+  // If the old entry was a single symbol, add it.
+  if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) {
+    SymList->push_back(Sym);
+    return;
+  }
+  
+  // Otherwise, concatenate the list.
+  std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
+  SymList->insert(SymList->end(), Syms->begin(), Syms->end());
+  delete Syms;
+}
+
+
+void MMIAddrLabelMapCallbackPtr::deleted() {
+  Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+  Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+
 //===----------------------------------------------------------------------===//
 
-MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(&ID)
-, ObjFileMMI(0)
-, CurCallSite(0)
-, CallsEHReturn(0)
-, CallsUnwindInit(0)
-, DbgInfoAvailable(false) {
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
+: ImmutablePass(&ID), Context(MAI),
+  ObjFileMMI(0),
+  CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(NULL);
+  AddrLabelSymbols = 0;
+  TheModule = 0;
+}
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass(&ID), Context(*(MCAsmInfo*)0) {
+  assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
+         "should always be explicitly constructed by LLVMTargetMachine");
+  abort();
 }
 
 MachineModuleInfo::~MachineModuleInfo() {
   delete ObjFileMMI;
+  
+  // FIXME: Why isn't doFinalization being called??
+  //assert(AddrLabelSymbols == 0 && "doFinalization not called");
+  delete AddrLabelSymbols;
+  AddrLabelSymbols = 0;
 }
 
 /// doInitialization - Initialize the state for a new module.
 ///
 bool MachineModuleInfo::doInitialization() {
+  assert(AddrLabelSymbols == 0 && "Improperly initialized");
   return false;
 }
 
 /// doFinalization - Tear down the state after completion of a module.
 ///
 bool MachineModuleInfo::doFinalization() {
+  delete AddrLabelSymbols;
+  AddrLabelSymbols = 0;
   return false;
 }
 
@@ -83,23 +313,61 @@
 
 /// AnalyzeModule - Scan the module for global debug information.
 ///
-void MachineModuleInfo::AnalyzeModule(Module &M) {
+void MachineModuleInfo::AnalyzeModule(const Module &M) {
   // Insert functions in the llvm.used array (but not llvm.compiler.used) into
   // UsedFunctions.
-  GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
   if (!GV || !GV->hasInitializer()) return;
 
   // Should be an array of 'i8*'.
-  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
   if (InitList == 0) return;
 
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
-    if (Function *F =
+    if (const Function *F =
           dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
       UsedFunctions.insert(F);
 }
 
-//===-EH-------------------------------------------------------------------===//
+//===- Address of Block Management ----------------------------------------===//
+
+
+/// getAddrLabelSymbol - Return the symbol to be used for the specified basic
+/// block when its address is taken.  This cannot be its normal LBB label
+/// because the block may be accessed outside its containing function.
+MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
+  // Lazily create AddrLabelSymbols.
+  if (AddrLabelSymbols == 0)
+    AddrLabelSymbols = new MMIAddrLabelMap(Context);
+  return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
+}
+
+/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+/// basic block when its address is taken.  If other blocks were RAUW'd to
+/// this one, we may have to emit them as well, return the whole set.
+std::vector<MCSymbol*> MachineModuleInfo::
+getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+  // Lazily create AddrLabelSymbols.
+  if (AddrLabelSymbols == 0)
+    AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
+}
+
+
+/// takeDeletedSymbolsForFunction - If the specified function has had any
+/// references to address-taken blocks generated, but the block got deleted,
+/// return the symbol now so we can emit it.  This prevents emitting a
+/// reference to a symbol that has no definition.
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+                              std::vector<MCSymbol*> &Result) {
+  // If no blocks have had their addresses taken, we're done.
+  if (AddrLabelSymbols == 0) return;
+  return AddrLabelSymbols->
+     takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
+//===- EH -----------------------------------------------------------------===//
 
 /// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
 /// specified MachineBasicBlock.
@@ -119,7 +387,7 @@
 /// addInvoke - Provide the begin and end labels of an invoke style call and
 /// associate it with a try landing pad block.
 void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
-                                  unsigned BeginLabel, unsigned EndLabel) {
+                                  MCSymbol *BeginLabel, MCSymbol *EndLabel) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.BeginLabels.push_back(BeginLabel);
   LP.EndLabels.push_back(EndLabel);
@@ -127,8 +395,8 @@
 
 /// addLandingPad - Provide the label of a try LandingPad block.
 ///
-unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
-  unsigned LandingPadLabel = NextLabelID();
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+  MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.LandingPadLabel = LandingPadLabel;
   return LandingPadLabel;
@@ -137,7 +405,7 @@
 /// addPersonality - Provide the personality function for the exception
 /// information.
 void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
-                                       Function *Personality) {
+                                       const Function *Personality) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.Personality = Personality;
 
@@ -156,7 +424,7 @@
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
 ///
 void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
-                                        std::vector<GlobalVariable *> &TyInfo) {
+                                  std::vector<const GlobalVariable *> &TyInfo) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   for (unsigned N = TyInfo.size(); N; --N)
     LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
@@ -165,7 +433,7 @@
 /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
 ///
 void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
-                                        std::vector<GlobalVariable *> &TyInfo) {
+                                  std::vector<const GlobalVariable *> &TyInfo) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   std::vector<unsigned> IdsInFilter(TyInfo.size());
   for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
@@ -182,10 +450,13 @@
 
 /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
 /// pads.
-void MachineModuleInfo::TidyLandingPads() {
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
   for (unsigned i = 0; i != LandingPads.size(); ) {
     LandingPadInfo &LandingPad = LandingPads[i];
-    LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel);
+    if (LandingPad.LandingPadLabel &&
+        !LandingPad.LandingPadLabel->isDefined() &&
+        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+      LandingPad.LandingPadLabel = 0;
 
     // Special case: we *should* emit LPs with null LP MBB. This indicates
     // "nounwind" case.
@@ -194,19 +465,17 @@
       continue;
     }
 
-    for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) {
-      unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]);
-      unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]);
-
-      if (!BeginLabel || !EndLabel) {
-        LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
-        LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
-        continue;
-      }
-
-      LandingPad.BeginLabels[j] = BeginLabel;
-      LandingPad.EndLabels[j] = EndLabel;
-      ++j;
+    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+      MCSymbol *EndLabel = LandingPad.EndLabels[j];
+      if ((BeginLabel->isDefined() ||
+           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+          (EndLabel->isDefined() ||
+           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+      
+      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+      --j, --e;
     }
 
     // Remove landing pads with no try-ranges.
@@ -220,14 +489,13 @@
     if (!LandingPad.LandingPadBlock ||
         (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
       LandingPad.TypeIds.clear();
-
     ++i;
   }
 }
 
 /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
 /// function wide.
-unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
   for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
     if (TypeInfos[i] == TI) return i + 1;
 
@@ -267,7 +535,7 @@
 }
 
 /// getPersonality - Return the personality function for the current function.
-Function *MachineModuleInfo::getPersonality() const {
+const Function *MachineModuleInfo::getPersonality() const {
   // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
   // function
   return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 39d2c75..5ab56c0 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -25,10 +25,9 @@
 void MachineModuleInfoELF::Anchor() {}
 
 static int SortSymbolPair(const void *LHS, const void *RHS) {
-  const MCSymbol *LHSS =
-    ((const std::pair<MCSymbol*, MCSymbol*>*)LHS)->first;
-  const MCSymbol *RHSS =
-    ((const std::pair<MCSymbol*, MCSymbol*>*)RHS)->first;
+  typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
+  const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
+  const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
   return LHSS->getName().compare(RHSS->getName());
 }
 
@@ -36,7 +35,7 @@
 /// sorted orer.
 MachineModuleInfoImpl::SymbolListTy
 MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
-                                                     MCSymbol*> &Map) {
+                                      MachineModuleInfoImpl::StubValueTy>&Map) {
   MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
 
   if (!List.empty())
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index d9ab677..724d81c 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -12,6 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
@@ -130,6 +133,157 @@
   return ++UI == use_nodbg_end();
 }
 
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->first == Reg || I->second == Reg)
+      return true;
+  return false;
+}
+
+bool MachineRegisterInfo::isLiveOut(unsigned Reg) const {
+  for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+    if (*I == Reg)
+      return true;
+  return false;
+}
+
+static cl::opt<bool>
+SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
+                  cl::desc("Schedule copies of livein registers"),
+                  cl::init(false));
+
+/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
+/// physical register has only a single copy use, then coalesced the copy
+/// if possible.
+static void EmitLiveInCopy(MachineBasicBlock *MBB,
+                           MachineBasicBlock::iterator &InsertPos,
+                           unsigned VirtReg, unsigned PhysReg,
+                           const TargetRegisterClass *RC,
+                           DenseMap<MachineInstr*, unsigned> &CopyRegMap,
+                           const MachineRegisterInfo &MRI,
+                           const TargetRegisterInfo &TRI,
+                           const TargetInstrInfo &TII) {
+  unsigned NumUses = 0;
+  MachineInstr *UseMI = NULL;
+  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
+         UE = MRI.use_end(); UI != UE; ++UI) {
+    UseMI = &*UI;
+    if (++NumUses > 1)
+      break;
+  }
+
+  // If the number of uses is not one, or the use is not a move instruction,
+  // don't coalesce. Also, only coalesce away a virtual register to virtual
+  // register copy.
+  bool Coalesced = false;
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (NumUses == 1 &&
+      TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+      TargetRegisterInfo::isVirtualRegister(DstReg)) {
+    VirtReg = DstReg;
+    Coalesced = true;
+  }
+
+  // Now find an ideal location to insert the copy.
+  MachineBasicBlock::iterator Pos = InsertPos;
+  while (Pos != MBB->begin()) {
+    MachineInstr *PrevMI = prior(Pos);
+    DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
+    // copyRegToReg might emit multiple instructions to do a copy.
+    unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
+    if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
+      // This is what the BB looks like right now:
+      // r1024 = mov r0
+      // ...
+      // r1    = mov r1024
+      //
+      // We want to insert "r1025 = mov r1". Inserting this copy below the
+      // move to r1024 makes it impossible for that move to be coalesced.
+      //
+      // r1025 = mov r1
+      // r1024 = mov r0
+      // ...
+      // r1    = mov 1024
+      // r2    = mov 1025
+      break; // Woot! Found a good location.
+    --Pos;
+  }
+
+  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+  (void) Emitted;
+
+  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+  if (Coalesced) {
+    if (&*InsertPos == UseMI) ++InsertPos;
+    MBB->erase(UseMI);
+  }
+}
+
+/// InsertLiveInDbgValue - Insert a DBG_VALUE instruction for each live-in
+/// register that has a corresponding source information metadata. e.g.
+/// function parameters.
+static void InsertLiveInDbgValue(MachineBasicBlock *MBB,
+                                 MachineBasicBlock::iterator InsertPos,
+                                 unsigned LiveInReg, unsigned VirtReg,
+                                 const MachineRegisterInfo &MRI,
+                                 const TargetInstrInfo &TII) {
+  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
+         UE = MRI.use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (!UseMI->isDebugValue() || UseMI->getParent() != MBB)
+      continue;
+    // Found local dbg_value. FIXME: Verify it's not possible to have multiple
+    // dbg_value's which reference the vr in the same mbb.
+    uint64_t Offset = UseMI->getOperand(1).getImm();
+    const MDNode *MDPtr = UseMI->getOperand(2).getMetadata();    
+    BuildMI(*MBB, InsertPos, InsertPos->getDebugLoc(),
+            TII.get(TargetOpcode::DBG_VALUE))
+      .addReg(LiveInReg).addImm(Offset).addMetadata(MDPtr);
+    return;
+  }
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+                                      const TargetRegisterInfo &TRI,
+                                      const TargetInstrInfo &TII) {
+  if (SchedLiveInCopies) {
+    // Emit the copies at a heuristically-determined location in the block.
+    DenseMap<MachineInstr*, unsigned> CopyRegMap;
+    MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
+    for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
+           E = livein_end(); LI != E; ++LI)
+      if (LI->second) {
+        const TargetRegisterClass *RC = getRegClass(LI->second);
+        EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
+                       RC, CopyRegMap, *this, TRI, TII);
+        InsertLiveInDbgValue(EntryMBB, InsertPos,
+                             LI->first, LI->second, *this, TII);
+      }
+  } else {
+    // Emit the copies into the top of the block.
+    for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
+           E = livein_end(); LI != E; ++LI)
+      if (LI->second) {
+        const TargetRegisterClass *RC = getRegClass(LI->second);
+        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+                                        LI->second, LI->first, RC, RC);
+        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+        (void) Emitted;
+        InsertLiveInDbgValue(EntryMBB, EntryMBB->begin(),
+                             LI->first, LI->second, *this, TII);
+      }
+  }
+
+  // Add function live-ins to entry block live-in set.
+  for (MachineRegisterInfo::livein_iterator I = livein_begin(),
+       E = livein_end(); I != E; ++I)
+    EntryMBB->addLiveIn(I->first);
+}
+
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
   for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 2255dc3..b8996d4 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -21,34 +21,50 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
-typedef std::vector<std::pair<MachineBasicBlock*, unsigned> >
-                IncomingPredInfoTy;
+/// BBInfo - Per-basic block information used internally by MachineSSAUpdater.
+class MachineSSAUpdater::BBInfo {
+public:
+  MachineBasicBlock *BB; // Back-pointer to the corresponding block.
+  unsigned AvailableVal; // Value to use in this block.
+  BBInfo *DefBB;         // Block that defines the available value.
+  int BlkNum;            // Postorder number.
+  BBInfo *IDom;          // Immediate dominator.
+  unsigned NumPreds;     // Number of predecessor blocks.
+  BBInfo **Preds;        // Array[NumPreds] of predecessor blocks.
+  MachineInstr *PHITag;  // Marker for existing PHIs that match.
 
+  BBInfo(MachineBasicBlock *ThisBB, unsigned V)
+    : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
+      NumPreds(0), Preds(0), PHITag(0) { }
+};
+
+typedef DenseMap<MachineBasicBlock*, MachineSSAUpdater::BBInfo*> BBMapTy;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
-  return *static_cast<IncomingPredInfoTy*>(IPI);
+static BBMapTy *getBBMap(void *BM) {
+  return static_cast<BBMapTy*>(BM);
 }
 
-
 MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
                                      SmallVectorImpl<MachineInstr*> *NewPHI)
-  : AV(0), IPI(0), InsertedPHIs(NewPHI) {
+  : AV(0), BM(0), InsertedPHIs(NewPHI) {
   TII = MF.getTarget().getInstrInfo();
   MRI = &MF.getRegInfo();
 }
 
 MachineSSAUpdater::~MachineSSAUpdater() {
   delete &getAvailableVals(AV);
-  delete &getIncomingPredInfo(IPI);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
@@ -59,11 +75,6 @@
   else
     getAvailableVals(AV).clear();
 
-  if (IPI == 0)
-    IPI = new IncomingPredInfoTy();
-  else
-    getIncomingPredInfo(IPI).clear();
-
   VR = V;
   VRC = MRI->getRegClass(VR);
 }
@@ -125,9 +136,9 @@
                            const TargetRegisterClass *RC,
                            MachineRegisterInfo *MRI, const TargetInstrInfo *TII) {
   unsigned NewVR = MRI->createVirtualRegister(RC);
-  return BuildMI(*BB, I, DebugLoc::getUnknownLoc(), TII->get(Opcode), NewVR);
+  return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
 }
-                          
+
 /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
 /// is live in the middle of the specified block.
 ///
@@ -150,7 +161,7 @@
 unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
   // If there is no definition of the renamed variable in this block, just use
   // GetValueAtEndOfBlock to do our work.
-  if (!getAvailableVals(AV).count(BB))
+  if (!HasValueForBlock(BB))
     return GetValueAtEndOfBlockInternal(BB);
 
   // If there are no predecessors, just return undef.
@@ -254,141 +265,436 @@
 
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
-/// walking predecessors inserting PHI nodes as needed until we get to a block
-/// where the value is available.
-///
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
 unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (unsigned V = AvailableVals[BB])
+    return V;
 
-  // Query AvailableVals by doing an insertion of null.
-  std::pair<AvailableValsTy::iterator, bool> InsertRes =
-    AvailableVals.insert(std::make_pair(BB, 0));
+  // Pool allocation used internally by GetValueAtEndOfBlock.
+  BumpPtrAllocator Allocator;
+  BBMapTy BBMapObj;
+  BM = &BBMapObj;
 
-  // Handle the case when the insertion fails because we have already seen BB.
-  if (!InsertRes.second) {
-    // If the insertion failed, there are two cases.  The first case is that the
-    // value is already available for the specified block.  If we get this, just
-    // return the value.
-    if (InsertRes.first->second != 0)
-      return InsertRes.first->second;
+  SmallVector<BBInfo*, 100> BlockList;
+  BuildBlockList(BB, &BlockList, &Allocator);
 
-    // Otherwise, if the value we find is null, then this is the value is not
-    // known but it is being computed elsewhere in our recursion.  This means
-    // that we have a cycle.  Handle this by inserting a PHI node and returning
-    // it.  When we get back to the first instance of the recursion we will fill
-    // in the PHI node.
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-    MachineInstr *NewPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
-                                        VRC, MRI,TII);
-    unsigned NewVR = NewPHI->getOperand(0).getReg();
-    InsertRes.first->second = NewVR;
-    return NewVR;
-  }
-
-  // If there are no predecessors, then we must have found an unreachable block
-  // just return 'undef'.  Since there are no predecessors, InsertRes must not
-  // be invalidated.
-  if (BB->pred_empty()) {
+  // Special case: bail out if BB is unreachable.
+  if (BlockList.size() == 0) {
+    BM = 0;
     // Insert an implicit_def to represent an undef value.
     MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
                                         BB, BB->getFirstTerminator(),
                                         VRC, MRI, TII);
-    return InsertRes.first->second = NewDef->getOperand(0).getReg();
+    unsigned V = NewDef->getOperand(0).getReg();
+    AvailableVals[BB] = V;
+    return V;
   }
 
-  // Okay, the value isn't in the map and we just inserted a null in the entry
-  // to indicate that we're processing the block.  Since we have no idea what
-  // value is in this block, we have to recurse through our predecessors.
-  //
-  // While we're walking our predecessors, we keep track of them in a vector,
-  // then insert a PHI node in the end if we actually need one.  We could use a
-  // smallvector here, but that would take a lot of stack space for every level
-  // of the recursion, just use IncomingPredInfo as an explicit stack.
-  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
-  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
+  FindDominators(&BlockList);
+  FindPHIPlacement(&BlockList);
+  FindAvailableVals(&BlockList);
 
-  // As we're walking the predecessors, keep track of whether they are all
-  // producing the same value.  If so, this value will capture it, if not, it
-  // will get reset to null.  We distinguish the no-predecessor case explicitly
-  // below.
-  unsigned SingularValue = 0;
-  bool isFirstPred = true;
+  BM = 0;
+  return BBMapObj[BB]->DefBB->AvailableVal;
+}
+
+/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+/// vector, set Info->NumPreds, and allocate space in Info->Preds.
+static void FindPredecessorBlocks(MachineSSAUpdater::BBInfo *Info,
+                                  SmallVectorImpl<MachineBasicBlock*> *Preds,
+                                  BumpPtrAllocator *Allocator) {
+  MachineBasicBlock *BB = Info->BB;
   for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
-         E = BB->pred_end(); PI != E; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-    unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
-    IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+         E = BB->pred_end(); PI != E; ++PI)
+    Preds->push_back(*PI);
 
-    // Compute SingularValue.
-    if (isFirstPred) {
-      SingularValue = PredVal;
-      isFirstPred = false;
-    } else if (PredVal != SingularValue)
-      SingularValue = 0;
-  }
+  Info->NumPreds = Preds->size();
+  Info->Preds = static_cast<MachineSSAUpdater::BBInfo**>
+    (Allocator->Allocate(Info->NumPreds * sizeof(MachineSSAUpdater::BBInfo*),
+                         AlignOf<MachineSSAUpdater::BBInfo*>::Alignment));
+}
 
-  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
-  /// this block is involved in a loop, a no-entry PHI node will have been
-  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
-  /// above.
-  unsigned &InsertedVal = AvailableVals[BB];
+/// BuildBlockList - Starting from the specified basic block, traverse back
+/// through its predecessors until reaching blocks with known values.  Create
+/// BBInfo structures for the blocks and append them to the block list.
+void MachineSSAUpdater::BuildBlockList(MachineBasicBlock *BB,
+                                       BlockListTy *BlockList,
+                                       BumpPtrAllocator *Allocator) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<BBInfo*, 10> RootList;
+  SmallVector<BBInfo*, 64> WorkList;
 
-  // If all the predecessor values are the same then we don't need to insert a
-  // PHI.  This is the simple and common case.
-  if (SingularValue) {
-    // If a PHI node got inserted, replace it with the singlar value and delete
-    // it.
-    if (InsertedVal) {
-      MachineInstr *OldVal = MRI->getVRegDef(InsertedVal);
-      // Be careful about dead loops.  These RAUW's also update InsertedVal.
-      assert(InsertedVal != SingularValue && "Dead loop?");
-      ReplaceRegWith(InsertedVal, SingularValue);
-      OldVal->eraseFromParent();
+  BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
+  (*BBMap)[BB] = Info;
+  WorkList.push_back(Info);
+
+  // Search backward from BB, creating BBInfos along the way and stopping when
+  // reaching blocks that define the value.  Record those defining blocks on
+  // the RootList.
+  SmallVector<MachineBasicBlock*, 10> Preds;
+  while (!WorkList.empty()) {
+    Info = WorkList.pop_back_val();
+    Preds.clear();
+    FindPredecessorBlocks(Info, &Preds, Allocator);
+
+    // Treat an unreachable predecessor as a definition with 'undef'.
+    if (Info->NumPreds == 0) {
+      // Insert an implicit_def to represent an undef value.
+      MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+                                          Info->BB,
+                                          Info->BB->getFirstTerminator(),
+                                          VRC, MRI, TII);
+      Info->AvailableVal = NewDef->getOperand(0).getReg();
+      Info->DefBB = Info;
+      RootList.push_back(Info);
+      continue;
     }
 
-    InsertedVal = SingularValue;
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      MachineBasicBlock *Pred = Preds[p];
+      // Check if BBMap already has a BBInfo for the predecessor block.
+      BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
+      if (BBMapBucket.second) {
+        Info->Preds[p] = BBMapBucket.second;
+        continue;
+      }
 
-    // Drop the entries we added in IncomingPredInfo to restore the stack.
-    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                           IncomingPredInfo.end());
-    return InsertedVal;
+      // Create a new BBInfo for the predecessor.
+      unsigned PredVal = AvailableVals.lookup(Pred);
+      BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
+      BBMapBucket.second = PredInfo;
+      Info->Preds[p] = PredInfo;
+
+      if (PredInfo->AvailableVal) {
+        RootList.push_back(PredInfo);
+        continue;
+      }
+      WorkList.push_back(PredInfo);
+    }
   }
 
+  // Now that we know what blocks are backwards-reachable from the starting
+  // block, do a forward depth-first traversal to assign postorder numbers
+  // to those blocks.
+  BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
+  unsigned BlkNum = 1;
 
-  // Otherwise, we do need a PHI: insert one now if we don't already have one.
-  MachineInstr *InsertedPHI;
-  if (InsertedVal == 0) {
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-    InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
-                               VRC, MRI, TII);
-    InsertedVal = InsertedPHI->getOperand(0).getReg();
-  } else {
-    InsertedPHI = MRI->getVRegDef(InsertedVal);
+  // Initialize the worklist with the roots from the backward traversal.
+  while (!RootList.empty()) {
+    Info = RootList.pop_back_val();
+    Info->IDom = PseudoEntry;
+    Info->BlkNum = -1;
+    WorkList.push_back(Info);
   }
 
-  // Fill in all the predecessors of the PHI.
-  MachineInstrBuilder MIB(InsertedPHI);
-  for (IncomingPredInfoTy::iterator I =
-         IncomingPredInfo.begin()+FirstPredInfoEntry,
-         E = IncomingPredInfo.end(); I != E; ++I)
-    MIB.addReg(I->second).addMBB(I->first);
+  while (!WorkList.empty()) {
+    Info = WorkList.back();
 
-  // Drop the entries we added in IncomingPredInfo to restore the stack.
-  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                         IncomingPredInfo.end());
+    if (Info->BlkNum == -2) {
+      // All the successors have been handled; assign the postorder number.
+      Info->BlkNum = BlkNum++;
+      // If not a root, put it on the BlockList.
+      if (!Info->AvailableVal)
+        BlockList->push_back(Info);
+      WorkList.pop_back();
+      continue;
+    }
 
-  // See if the PHI node can be merged to a single value.  This can happen in
-  // loop cases when we get a PHI of itself and one other value.
-  if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
-    MRI->replaceRegWith(InsertedVal, ConstVal);
-    InsertedPHI->eraseFromParent();
-    InsertedVal = ConstVal;
-  } else {
+    // Leave this entry on the worklist, but set its BlkNum to mark that its
+    // successors have been put on the worklist.  When it returns to the top
+    // the list, after handling its successors, it will be assigned a number.
+    Info->BlkNum = -2;
+
+    // Add unvisited successors to the work list.
+    for (MachineBasicBlock::succ_iterator SI = Info->BB->succ_begin(),
+           E = Info->BB->succ_end(); SI != E; ++SI) {
+      BBInfo *SuccInfo = (*BBMap)[*SI];
+      if (!SuccInfo || SuccInfo->BlkNum)
+        continue;
+      SuccInfo->BlkNum = -1;
+      WorkList.push_back(SuccInfo);
+    }
+  }
+  PseudoEntry->BlkNum = BlkNum;
+}
+
+/// IntersectDominators - This is the dataflow lattice "meet" operation for
+/// finding dominators.  Given two basic blocks, it walks up the dominator
+/// tree until it finds a common dominator of both.  It uses the postorder
+/// number of the blocks to determine how to do that.
+static MachineSSAUpdater::BBInfo *
+IntersectDominators(MachineSSAUpdater::BBInfo *Blk1,
+                    MachineSSAUpdater::BBInfo *Blk2) {
+  while (Blk1 != Blk2) {
+    while (Blk1->BlkNum < Blk2->BlkNum) {
+      Blk1 = Blk1->IDom;
+      if (!Blk1)
+        return Blk2;
+    }
+    while (Blk2->BlkNum < Blk1->BlkNum) {
+      Blk2 = Blk2->IDom;
+      if (!Blk2)
+        return Blk1;
+    }
+  }
+  return Blk1;
+}
+
+/// FindDominators - Calculate the dominator tree for the subset of the CFG
+/// corresponding to the basic blocks on the BlockList.  This uses the
+/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
+/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
+/// Because the CFG subset does not include any edges leading into blocks that
+/// define the value, the results are not the usual dominator tree.  The CFG
+/// subset has a single pseudo-entry node with edges to a set of root nodes
+/// for blocks that define the value.  The dominators for this subset CFG are
+/// not the standard dominators but they are adequate for placing PHIs within
+/// the subset CFG.
+void MachineSSAUpdater::FindDominators(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // Start with the first predecessor.
+      assert(Info->NumPreds > 0 && "unreachable block");
+      BBInfo *NewIDom = Info->Preds[0];
+
+      // Iterate through the block's other predecessors.
+      for (unsigned p = 1; p != Info->NumPreds; ++p) {
+        BBInfo *Pred = Info->Preds[p];
+        NewIDom = IntersectDominators(NewIDom, Pred);
+      }
+
+      // Check if the IDom value has changed.
+      if (NewIDom != Info->IDom) {
+        Info->IDom = NewIDom;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
+
+/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
+/// any blocks containing definitions of the value.  If one is found, then the
+/// successor of Pred is in the dominance frontier for the definition, and
+/// this function returns true.
+static bool IsDefInDomFrontier(const MachineSSAUpdater::BBInfo *Pred,
+                               const MachineSSAUpdater::BBInfo *IDom) {
+  for (; Pred != IDom; Pred = Pred->IDom) {
+    if (Pred->DefBB == Pred)
+      return true;
+  }
+  return false;
+}
+
+/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
+/// the known definitions.  Iteratively add PHIs in the dom frontiers until
+/// nothing changes.  Along the way, keep track of the nearest dominating
+/// definitions for non-PHI blocks.
+void MachineSSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // If this block already needs a PHI, there is nothing to do here.
+      if (Info->DefBB == Info)
+        continue;
+
+      // Default to use the same def as the immediate dominator.
+      BBInfo *NewDefBB = Info->IDom->DefBB;
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
+          // Need a PHI here.
+          NewDefBB = Info;
+          break;
+        }
+      }
+
+      // Check if anything changed.
+      if (NewDefBB != Info->DefBB) {
+        Info->DefBB = NewDefBB;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
+
+/// FindAvailableVal - If this block requires a PHI, first check if an existing
+/// PHI matches the PHI placement and reaching definitions computed earlier,
+/// and if not, create a new PHI.  Visit all the block's predecessors to
+/// calculate the available value for each one and fill in the incoming values
+/// for a new PHI.
+void MachineSSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+
+  // Go through the worklist in forward order (i.e., backward through the CFG)
+  // and check if existing PHIs can be used.  If not, create empty PHIs where
+  // they are needed.
+  for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+       I != E; ++I) {
+    BBInfo *Info = *I;
+    // Check if there needs to be a PHI in BB.
+    if (Info->DefBB != Info)
+      continue;
+
+    // Look for an existing PHI.
+    FindExistingPHI(Info->BB, BlockList);
+    if (Info->AvailableVal)
+      continue;
+
+    MachineBasicBlock::iterator Loc =
+      Info->BB->empty() ? Info->BB->end() : Info->BB->front();
+    MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, Info->BB, Loc,
+                                             VRC, MRI, TII);
+    unsigned PHI = InsertedPHI->getOperand(0).getReg();
+    Info->AvailableVal = PHI;
+    AvailableVals[Info->BB] = PHI;
+  }
+
+  // Now go back through the worklist in reverse order to fill in the arguments
+  // for any new PHIs added in the forward traversal.
+  for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+         E = BlockList->rend(); I != E; ++I) {
+    BBInfo *Info = *I;
+
+    if (Info->DefBB != Info) {
+      // Record the available value at join nodes to speed up subsequent
+      // uses of this SSAUpdater for the same value.
+      if (Info->NumPreds > 1)
+        AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
+      continue;
+    }
+
+    // Check if this block contains a newly added PHI.
+    unsigned PHI = Info->AvailableVal;
+    MachineInstr *InsertedPHI = MRI->getVRegDef(PHI);
+    if (!InsertedPHI->isPHI() || InsertedPHI->getNumOperands() > 1)
+      continue;
+
+    // Iterate through the block's predecessors.
+    MachineInstrBuilder MIB(InsertedPHI);
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      BBInfo *PredInfo = Info->Preds[p];
+      MachineBasicBlock *Pred = PredInfo->BB;
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+      MIB.addReg(PredInfo->AvailableVal).addMBB(Pred);
+    }
+
     DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
 
     // If the client wants to know about all new instructions, tell it.
     if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
   }
+}
 
-  return InsertedVal;
+/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
+/// them match what is needed.
+void MachineSSAUpdater::FindExistingPHI(MachineBasicBlock *BB,
+                                        BlockListTy *BlockList) {
+  for (MachineBasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
+       BBI != BBE && BBI->isPHI(); ++BBI) {
+    if (CheckIfPHIMatches(BBI)) {
+      RecordMatchingPHI(BBI);
+      break;
+    }
+    // Match failed: clear all the PHITag values.
+    for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+         I != E; ++I)
+      (*I)->PHITag = 0;
+  }
+}
+
+/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
+/// in the BBMap.
+bool MachineSSAUpdater::CheckIfPHIMatches(MachineInstr *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<MachineInstr*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Mark that the block containing this PHI has been visited.
+  (*BBMap)[PHI->getParent()]->PHITag = PHI;
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
+      unsigned IncomingVal = PHI->getOperand(i).getReg();
+      BBInfo *PredInfo = (*BBMap)[PHI->getOperand(i+1).getMBB()];
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+
+      // Check if it matches the expected value.
+      if (PredInfo->AvailableVal) {
+        if (IncomingVal == PredInfo->AvailableVal)
+          continue;
+        return false;
+      }
+
+      // Check if the value is a PHI in the correct block.
+      MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
+      if (!IncomingPHIVal->isPHI() ||
+          IncomingPHIVal->getParent() != PredInfo->BB)
+        return false;
+
+      // If this block has already been visited, check if this PHI matches.
+      if (PredInfo->PHITag) {
+        if (IncomingPHIVal == PredInfo->PHITag)
+          continue;
+        return false;
+      }
+      PredInfo->PHITag = IncomingPHIVal;
+
+      WorkList.push_back(IncomingPHIVal);
+    }
+  }
+  return true;
+}
+
+/// RecordMatchingPHI - For a PHI node that matches, record it and its input
+/// PHIs in both the BBMap and the AvailableVals mapping.
+void MachineSSAUpdater::RecordMatchingPHI(MachineInstr *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  SmallVector<MachineInstr*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Record this PHI.
+  MachineBasicBlock *BB = PHI->getParent();
+  AvailableVals[BB] = PHI->getOperand(0).getReg();
+  (*BBMap)[BB]->AvailableVal = PHI->getOperand(0).getReg();
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
+      unsigned IncomingVal = PHI->getOperand(i).getReg();
+      MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
+      if (!IncomingPHIVal->isPHI()) continue;
+      BB = IncomingPHIVal->getParent();
+      BBInfo *Info = (*BBMap)[BB];
+      if (!Info || Info->AvailableVal)
+        continue;
+
+      // Record the PHI and add it to the worklist.
+      AvailableVals[BB] = IncomingVal;
+      Info->AvailableVal = IncomingVal;
+      WorkList.push_back(IncomingPHIVal);
+    }
+  }
 }
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index e47ba7c..ef489dc 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -37,6 +38,7 @@
     const TargetRegisterInfo *TRI;
     MachineRegisterInfo  *RegInfo; // Machine register information
     MachineDominatorTree *DT;   // Machine dominator tree
+    MachineLoopInfo *LI;
     AliasAnalysis *AA;
     BitVector AllocatableSet;   // Which physregs are allocatable?
 
@@ -51,7 +53,9 @@
       MachineFunctionPass::getAnalysisUsage(AU);
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineDominatorTree>();
+      AU.addPreserved<MachineLoopInfo>();
     }
   private:
     bool ProcessBlock(MachineBasicBlock &MBB);
@@ -102,6 +106,7 @@
   TRI = TM.getRegisterInfo();
   RegInfo = &MF.getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<MachineLoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
   AllocatableSet = TRI->getAllocatableSet(MF);
 
@@ -126,6 +131,11 @@
   // Can't sink anything out of a block that has less than two successors.
   if (MBB.succ_size() <= 1 || MBB.empty()) return false;
 
+  // Don't bother sinking code out of unreachable blocks. In addition to being
+  // unprofitable, it can also lead to infinite looping, because in an unreachable
+  // loop there may be nowhere to stop.
+  if (!DT->isReachableFromEntry(&MBB)) return false;
+
   bool MadeChange = false;
 
   // Walk the basic block bottom-up.  Remember if we saw a store.
@@ -271,8 +281,29 @@
   // but for now we just punt.
   // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
-    DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
-    return false;
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    bool store = true;
+    if (!MI->isSafeToMove(TII, AA, store)) {
+      DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+      return false;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+      return false;
+    }
+
+    // Don't sink instructions into a loop.
+    if (LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+      return false;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    DEBUG(dbgs() << "Sinking along critical edge.\n");
   }
   
   // Determine where to insert into.  Skip phi nodes.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 434a1e8..0b75c55 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -279,7 +279,7 @@
   if (OutFile)
     delete OutFile;
   else if (foundErrors)
-    llvm_report_error("Found "+Twine(foundErrors)+" machine code errors.");
+    report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
 
   // Clean up.
   regsLive.clear();
@@ -351,8 +351,8 @@
 }
 
 // Does iterator point to a and b as the first two elements?
-bool matchPair(MachineBasicBlock::const_succ_iterator i,
-               const MachineBasicBlock *a, const MachineBasicBlock *b) {
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+                      const MachineBasicBlock *a, const MachineBasicBlock *b) {
   if (*i == a)
     return *++i == b;
   if (*i == b)
@@ -470,7 +470,7 @@
   }
 
   regsLive.clear();
-  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I) {
     if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
       report("MBB live-in list contains non-physical register", MBB);
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
index acb6869..41fc204 100644
--- a/lib/CodeGen/OptimizeExts.cpp
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -73,6 +73,9 @@
 /// the source, and if the source value is preserved as a sub-register of
 /// the result, then replace all reachable uses of the source with the subreg
 /// of the result.
+/// Do not generate an EXTRACT that is used only in a debug use, as this
+/// changes the code.  Since this code does not currently share EXTRACTs, just
+/// ignore all debug uses.
 bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                                  SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
   bool Changed = false;
@@ -84,17 +87,17 @@
         TargetRegisterInfo::isPhysicalRegister(SrcReg))
       return false;
 
-    MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
-    if (++UI == MRI->use_end())
+    MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
+    if (++UI == MRI->use_nodbg_end())
       // No other uses.
       return false;
 
     // Ok, the source has other uses. See if we can replace the other uses
     // with use of the result of the extension.
     SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
-    UI = MRI->use_begin(DstReg);
-    for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
-         ++UI)
+    UI = MRI->use_nodbg_begin(DstReg);
+    for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+         UI != UE; ++UI)
       ReachedBBs.insert(UI->getParent());
 
     bool ExtendLife = true;
@@ -103,9 +106,9 @@
     // Uses that the result of the instruction can reach.
     SmallVector<MachineOperand*, 8> ExtendedUses;
 
-    UI = MRI->use_begin(SrcReg);
-    for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
-         ++UI) {
+    UI = MRI->use_nodbg_begin(SrcReg);
+    for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+         UI != UE; ++UI) {
       MachineOperand &UseMO = UI.getOperand();
       MachineInstr *UseMI = &*UI;
       if (UseMI == MI)
@@ -147,9 +150,9 @@
       // Look for PHI uses of the extended result, we don't want to extend the
       // liveness of a PHI input. It breaks all kinds of assumptions down
       // stream. A PHI use is expected to be the kill of its source values.
-      UI = MRI->use_begin(DstReg);
-      for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
-           ++UI)
+      UI = MRI->use_nodbg_begin(DstReg);
+      for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+           UI != UE; ++UI)
         if (UI->isPHI())
           PHIBBs.insert(UI->getParent());
 
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 8bbe0a7..f0057ce 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -74,7 +74,7 @@
          E = ImpDefs.end(); I != E; ++I) {
     MachineInstr *DefMI = *I;
     unsigned DefReg = DefMI->getOperand(0).getReg();
-    if (MRI->use_empty(DefReg))
+    if (MRI->use_nodbg_empty(DefReg))
       DefMI->eraseFromParent();
   }
 
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 424181c..79a25ef 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -46,7 +46,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
-#include <map>
 #include <set>
 using namespace llvm;
 
@@ -266,6 +265,17 @@
     // Initialize register live-range state for scheduling in this block.
     Scheduler.StartBlock(MBB);
 
+    // FIXME: Temporary workaround for <rdar://problem/7759363>: The post-RA
+    // scheduler has some sort of problem with DebugValue instructions that
+    // causes an assertion in LeaksContext.h to fail occasionally.  Just
+    // remove all those instructions for now.
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ) {
+      MachineInstr *MI = &*I++;
+      if (MI->isDebugValue())
+        MI->eraseFromParent();
+    }
+
     // Schedule each sequence of instructions not interrupted by a label
     // or anything else that effectively needs to shut down scheduling.
     MachineBasicBlock::iterator Current = MBB->end();
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 70e91aa..2d49beb 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -665,7 +665,7 @@
 
 /// ReconstructLiveInterval - Recompute a live interval from scratch.
 void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
-  BumpPtrAllocator& Alloc = LIs->getVNInfoAllocator();
+  VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator();
   
   // Clear the old ranges and valnos;
   LI->clear();
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 138e711..365a84a 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -24,7 +24,6 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetMachine.h"
@@ -57,11 +56,7 @@
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
-
-  // Get MachineModuleInfo so that we can track the construction of the
-  // frame.
-  if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
-    Fn.getFrameInfo()->setMachineModuleInfo(MMI);
+  FrameConstantRegMap.clear();
 
   // Calculate the MaxCallFrameSize and HasCalls variables for the function's
   // frame information. Also eliminates call frame pseudo instructions.
@@ -136,10 +131,10 @@
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
-  bool HasCalls = FFI->hasCalls();
+  bool HasCalls = MFI->hasCalls();
 
   // Get the function call frame set-up and tear-down instruction opcode
   int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
@@ -167,8 +162,8 @@
         HasCalls = true;
       }
 
-  FFI->setHasCalls(HasCalls);
-  FFI->setMaxCallFrameSize(MaxCallFrameSize);
+  MFI->setHasCalls(HasCalls);
+  MFI->setMaxCallFrameSize(MaxCallFrameSize);
 
   for (std::vector<MachineBasicBlock::iterator>::iterator
          i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
@@ -189,7 +184,7 @@
 void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
   const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Get the callee saved register list...
   const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
@@ -260,19 +255,19 @@
       // the TargetRegisterClass if the stack alignment is smaller. Use the
       // min.
       Align = std::min(Align, StackAlign);
-      FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true);
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
       if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
       if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
     } else {
       // Spill it to the stack where we must.
-      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
                                         true, false);
     }
 
     I->setFrameIdx(FrameIdx);
   }
 
-  FFI->setCalleeSavedInfo(CSI);
+  MFI->setCalleeSavedInfo(CSI);
 }
 
 /// insertCSRSpillsAndRestores - Insert spill and restore code for
@@ -280,10 +275,10 @@
 ///
 void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   // Get callee saved register information.
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
 
-  FFI->setCalleeSavedInfoValid(true);
+  MFI->setCalleeSavedInfoValid(true);
 
   // Early exit if no callee saved registers are modified!
   if (CSI.empty())
@@ -441,14 +436,14 @@
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
 static inline void
-AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
                   unsigned &MaxAlign) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
-    Offset += FFI->getObjectSize(FrameIdx);
+    Offset += MFI->getObjectSize(FrameIdx);
 
-  unsigned Align = FFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
 
   // If the alignment of this object is greater than that of the stack, then
   // increase the stack alignment to match.
@@ -458,10 +453,10 @@
   Offset = (Offset + Align - 1) / Align * Align;
 
   if (StackGrowsDown) {
-    FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
   } else {
-    FFI->setObjectOffset(FrameIdx, Offset);
-    Offset += FFI->getObjectSize(FrameIdx);
+    MFI->setObjectOffset(FrameIdx, Offset);
+    Offset += MFI->getObjectSize(FrameIdx);
   }
 }
 
@@ -475,7 +470,7 @@
     TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
 
   // Loop over all of the stack objects, assigning sequential addresses...
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
@@ -492,17 +487,17 @@
   // We currently don't support filling in holes in between fixed sized
   // objects, so we adjust 'Offset' to point to the end of last fixed sized
   // preallocated object.
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
     int64_t FixedOff;
     if (StackGrowsDown) {
       // The maximum distance from the stack pointer is at lower address of
       // the object -- which is given by offset. For down growing stack
       // the offset is negative, so we negate the offset to get the distance.
-      FixedOff = -FFI->getObjectOffset(i);
+      FixedOff = -MFI->getObjectOffset(i);
     } else {
       // The maximum distance from the start pointer is at the upper
       // address of the object.
-      FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
+      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
     }
     if (FixedOff > Offset) Offset = FixedOff;
   }
@@ -513,27 +508,27 @@
     for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
       // If stack grows down, we need to add size of find the lowest
       // address of the object.
-      Offset += FFI->getObjectSize(i);
+      Offset += MFI->getObjectSize(i);
 
-      unsigned Align = FFI->getObjectAlignment(i);
+      unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
-      FFI->setObjectOffset(i, -Offset);        // Set the computed offset
+      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
     }
   } else {
     int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
-      unsigned Align = FFI->getObjectAlignment(i);
+      unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
-      FFI->setObjectOffset(i, Offset);
-      Offset += FFI->getObjectSize(i);
+      MFI->setObjectOffset(i, Offset);
+      Offset += MFI->getObjectSize(i);
     }
   }
 
-  unsigned MaxAlign = FFI->getMaxAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
 
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
@@ -541,28 +536,28 @@
   if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
-      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
   // Make sure that the stack protector comes before the local variables on the
   // stack.
-  if (FFI->getStackProtectorIndex() >= 0)
-    AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
+  if (MFI->getStackProtectorIndex() >= 0)
+    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
                       Offset, MaxAlign);
 
   // Then assign frame offsets to stack objects that are not used to spill
   // callee saved registers.
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
     if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
       continue;
     if (RS && (int)i == RS->getScavengingFrameIndex())
       continue;
-    if (FFI->isDeadObjectIndex(i))
+    if (MFI->isDeadObjectIndex(i))
       continue;
-    if (FFI->getStackProtectorIndex() == (int)i)
+    if (MFI->getStackProtectorIndex() == (int)i)
       continue;
 
-    AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
   }
 
   // Make sure the special register scavenging spill slot is closest to the
@@ -570,15 +565,15 @@
   if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
-      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
   if (!RegInfo->targetHandlesStackFrameRounding()) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
-      Offset += FFI->getMaxCallFrameSize();
+    if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
+      Offset += MFI->getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
     // any calls or alloca's, align to the target's StackAlignment value to
@@ -586,8 +581,8 @@
     // otherwise, for leaf functions, align to the TransientStackAlignment
     // value.
     unsigned StackAlign;
-    if (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
-        (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0))
+    if (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
       StackAlign = TFI.getStackAlignment();
     else
       StackAlign = TFI.getTransientStackAlignment();
@@ -599,7 +594,7 @@
   }
 
   // Update frame info to pretend that this is part of the stack...
-  FFI->setStackSize(Offset - LocalAreaOffset);
+  MFI->setStackSize(Offset - LocalAreaOffset);
 }
 
 
@@ -685,7 +680,7 @@
           // If this instruction has a FrameIndex operand, we need to
           // use that target machine register info object to eliminate
           // it.
-          int Value;
+          TargetRegisterInfo::FrameIndexValue Value;
           unsigned VReg =
             TRI.eliminateFrameIndex(MI, SPAdj, &Value,
                                     FrameIndexVirtualScavenging ?  NULL : RS);
@@ -764,12 +759,12 @@
     unsigned CurrentVirtReg = 0;
     unsigned CurrentScratchReg = 0;
     bool havePrevValue = false;
-    int PrevValue = 0;
+    TargetRegisterInfo::FrameIndexValue PrevValue(0,0);
+    TargetRegisterInfo::FrameIndexValue Value(0,0);
     MachineInstr *PrevLastUseMI = NULL;
     unsigned PrevLastUseOp = 0;
     bool trackingCurrentValue = false;
     int SPAdj = 0;
-    int Value = 0;
 
     // The instruction stream may change in the loop, so check BB->end()
     // directly.
@@ -826,8 +821,11 @@
             if (trackingCurrentValue) {
               SPAdj = (*Entry).second.second;
               Value = (*Entry).second.first;
-            } else
-              SPAdj = Value = 0;
+            } else {
+              SPAdj = 0;
+              Value.first = 0;
+              Value.second = 0;
+            }
 
             // If the scratch register from the last allocation is still
             // available, see if the value matches. If it does, just re-use it.
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 931f1eb..aa95773 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -102,7 +102,8 @@
     // When using the scavenger post-pass to resolve frame reference
     // materialization registers, maintain a map of the registers to
     // the constant value and SP adjustment associated with it.
-    typedef std::pair<int, int> FrameConstantEntry;
+    typedef std::pair<TargetRegisterInfo::FrameIndexValue, int>
+      FrameConstantEntry;
     DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap;
 
 #ifndef NDEBUG
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 0000000..2caf1df
--- /dev/null
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,932 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+static RegisterRegAlloc
+  fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+  class RAFast : public MachineFunctionPass {
+  public:
+    static char ID;
+    RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
+  private:
+    const TargetMachine *TM;
+    MachineFunction *MF;
+    const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
+
+    // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+    // values are spilled.
+    IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+    // Virt2PhysRegMap - This map contains entries for each virtual register
+    // that is currently available in a physical register.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+    unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+      return Virt2PhysRegMap[VirtReg];
+    }
+
+    // PhysRegsUsed - This array is effectively a map, containing entries for
+    // each physical register that currently has a value (ie, it is in
+    // Virt2PhysRegMap).  The value mapped to is the virtual register
+    // corresponding to the physical register (the inverse of the
+    // Virt2PhysRegMap), or 0.  The value is set to 0 if this register is pinned
+    // because it is used by a future instruction, and to -2 if it is not
+    // allocatable.  If the entry for a physical register is -1, then the
+    // physical register is "not in the map".
+    //
+    std::vector<int> PhysRegsUsed;
+
+    // UsedInInstr - BitVector of physregs that are used in the current
+    // instruction, and so cannot be allocated.
+    BitVector UsedInInstr;
+
+    // Virt2LastUseMap - This maps each virtual register to its last use
+    // (MachineInstr*, operand index pair).
+    IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
+    Virt2LastUseMap;
+
+    std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      return Virt2LastUseMap[Reg];
+    }
+
+    // VirtRegModified - This bitset contains information about which virtual
+    // registers need to be spilled back to memory when their registers are
+    // scavenged.  If a virtual register has simply been rematerialized, there
+    // is no reason to spill it to memory when we need the register back.
+    //
+    BitVector VirtRegModified;
+
+    // UsedInMultipleBlocks - Tracks whether a particular register is used in
+    // more than one block.
+    BitVector UsedInMultipleBlocks;
+
+    void markVirtRegModified(unsigned Reg, bool Val = true) {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      Reg -= TargetRegisterInfo::FirstVirtualRegister;
+      if (Val)
+        VirtRegModified.set(Reg);
+      else
+        VirtRegModified.reset(Reg);
+    }
+
+    bool isVirtRegModified(unsigned Reg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
+             VirtRegModified.size() && "Illegal virtual register!");
+      return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
+    }
+
+  public:
+    virtual const char *getPassName() const {
+      return "Fast Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<LiveVariables>();
+      AU.addRequiredID(PHIEliminationID);
+      AU.addRequiredID(TwoAddressInstructionPassID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// runOnMachineFunction - Register allocate the whole function
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+    /// AllocateBasicBlock - Register allocate the specified basic block.
+    void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+
+    /// areRegsEqual - This method returns true if the specified registers are
+    /// related to each other.  To do this, it checks to see if they are equal
+    /// or if the first register is in the alias set of the second register.
+    ///
+    bool areRegsEqual(unsigned R1, unsigned R2) const {
+      if (R1 == R2) return true;
+      for (const unsigned *AliasSet = TRI->getAliasSet(R2);
+           *AliasSet; ++AliasSet) {
+        if (*AliasSet == R1) return true;
+      }
+      return false;
+    }
+
+    /// getStackSpaceFor - This returns the frame index of the specified virtual
+    /// register on the stack, allocating space if necessary.
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+    /// removePhysReg - This method marks the specified physical register as no
+    /// longer being in use.
+    ///
+    void removePhysReg(unsigned PhysReg);
+
+    /// spillVirtReg - This method spills the value specified by PhysReg into
+    /// the virtual register slot specified by VirtReg.  It then updates the RA
+    /// data structures to indicate the fact that PhysReg is now available.
+    ///
+    void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                      unsigned VirtReg, unsigned PhysReg);
+
+    /// spillPhysReg - This method spills the specified physical register into
+    /// the virtual register slot associated with it.  If OnlyVirtRegs is set to
+    /// true, then the request is ignored if the physical register does not
+    /// contain a virtual register.
+    ///
+    void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                      unsigned PhysReg, bool OnlyVirtRegs = false);
+
+    /// assignVirtToPhysReg - This method updates local state so that we know
+    /// that PhysReg is the proper container for VirtReg now.  The physical
+    /// register must not be used for anything else when this is called.
+    ///
+    void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+    /// isPhysRegAvailable - Return true if the specified physical register is
+    /// free and available for use.  This also includes checking to see if
+    /// aliased registers are all free...
+    ///
+    bool isPhysRegAvailable(unsigned PhysReg) const;
+
+    /// isPhysRegSpillable - Can PhysReg be freed by spilling?
+    bool isPhysRegSpillable(unsigned PhysReg) const;
+
+    /// getFreeReg - Look to see if there is a free register available in the
+    /// specified register class.  If not, return 0.
+    ///
+    unsigned getFreeReg(const TargetRegisterClass *RC);
+
+    /// getReg - Find a physical register to hold the specified virtual
+    /// register.  If all compatible physical registers are used, this method
+    /// spills the last used virtual register to the stack, and uses that
+    /// register. If NoFree is true, that means the caller knows there isn't
+    /// a free register, do not call getFreeReg().
+    unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                    unsigned VirtReg, bool NoFree = false);
+
+    /// reloadVirtReg - This method transforms the specified virtual
+    /// register use to refer to a physical register.  This method may do this
+    /// in one of several ways: if the register is available in a physical
+    /// register already, it uses that physical register.  If the value is not
+    /// in a physical register, and if there are physical registers available,
+    /// it loads it into a register: PhysReg if that is an available physical
+    /// register, otherwise any physical register of the right class.
+    /// If register pressure is high, and it is possible, it tries to fold the
+    /// load of the virtual register into the instruction itself.  It avoids
+    /// doing this if register pressure is low to improve the chance that
+    /// subsequent instructions can use the reloaded value.  This method
+    /// returns the modified instruction.
+    ///
+    MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
+                                unsigned PhysReg);
+
+    void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
+                       unsigned PhysReg);
+  };
+  char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+  // Find the location Reg would belong...
+  int SS = StackSlotForVirtReg[VirtReg];
+  if (SS != -1)
+    return SS;          // Already has space allocated?
+
+  // Allocate a new stack object for this spill location...
+  int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                            RC->getAlignment());
+
+  // Assign the slot.
+  StackSlotForVirtReg[VirtReg] = FrameIdx;
+  return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RAFast::removePhysReg(unsigned PhysReg) {
+  PhysRegsUsed[PhysReg] = -1;      // PhyReg no longer used
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg.  It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RAFast::spillVirtReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I,
+                           unsigned VirtReg, unsigned PhysReg) {
+  assert(VirtReg && "Spilling a physical register is illegal!"
+         " Must not have appropriate kill for the register or use exists beyond"
+         " the intended one.");
+  DEBUG(dbgs() << "  Spilling register " << TRI->getName(PhysReg)
+               << " containing %reg" << VirtReg);
+
+  if (!isVirtRegModified(VirtReg)) {
+    DEBUG(dbgs() << " which has not been modified, so no store necessary!");
+    std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
+    if (LastUse.first)
+      LastUse.first->getOperand(LastUse.second).setIsKill();
+  } else {
+    // Otherwise, there is a virtual register corresponding to this physical
+    // register.  We only need to spill it into its stack slot if it has been
+    // modified.
+    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+    int FrameIndex = getStackSpaceFor(VirtReg, RC);
+    DEBUG(dbgs() << " to stack slot #" << FrameIndex);
+    // If the instruction reads the register that's spilled, (e.g. this can
+    // happen if it is a move to a physical register), then the spill
+    // instruction is not a kill.
+    bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
+    TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+    ++NumStores;   // Update statistics
+  }
+
+  getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
+
+  DEBUG(dbgs() << '\n');
+  removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it.  If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                           unsigned PhysReg, bool OnlyVirtRegs) {
+  if (PhysRegsUsed[PhysReg] != -1) {            // Only spill it if it's used!
+    assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+    if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+      spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+    return;
+  }
+
+  // If the selected register aliases any other registers, we must make
+  // sure that one of the aliases isn't alive.
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet) {
+    if (PhysRegsUsed[*AliasSet] == -1 ||     // Spill aliased register.
+        PhysRegsUsed[*AliasSet] == -2)       // If allocatable.
+      continue;
+
+    if (PhysRegsUsed[*AliasSet])
+      spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+  }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now.  The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+  assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+  // Update information to note the fact that this register was just used, and
+  // it holds VirtReg.
+  PhysRegsUsed[PhysReg] = VirtReg;
+  getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+  UsedInInstr.set(PhysReg);
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use.  This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RAFast::isPhysRegAvailable(unsigned PhysReg) const {
+  if (PhysRegsUsed[PhysReg] != -1) return false;
+
+  // If the selected register aliases any other allocated registers, it is
+  // not free!
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet)
+    if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
+      return false;                    // Can't use this reg then.
+  return true;
+}
+
+/// isPhysRegSpillable - Return true if the specified physical register can be
+/// spilled for use in the current instruction.
+///
+bool RAFast::isPhysRegSpillable(unsigned PhysReg) const {
+  // Test that PhysReg and all aliases are either free or assigned to a VirtReg
+  // that is not used in the instruction.
+  if (PhysRegsUsed[PhysReg] != -1 &&
+      (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg)))
+    return false;
+
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet)
+    if (PhysRegsUsed[*AliasSet] != -1 &&
+        (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet)))
+      return false;
+  return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class.  If not, return 0.
+///
+unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) {
+  // Get iterators defining the range of registers that are valid to allocate in
+  // this class, which also specifies the preferred allocation order.
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  for (; RI != RE; ++RI)
+    if (isPhysRegAvailable(*RI)) {       // Is reg unused?
+      assert(*RI != 0 && "Cannot use register!");
+      return *RI; // Found an unused register!
+    }
+  return 0;
+}
+
+
+/// getReg - Find a physical register to hold the specified virtual
+/// register.  If all compatible physical registers are used, this method spills
+/// the last used virtual register to the stack, and uses that register.
+///
+unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I,
+                         unsigned VirtReg, bool NoFree) {
+  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+
+  // First check to see if we have a free register of the requested type...
+  unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
+
+  if (PhysReg != 0) {
+    // Assign the register.
+    assignVirtToPhysReg(VirtReg, PhysReg);
+    return PhysReg;
+  }
+
+  // If we didn't find an unused register, scavenge one now! Don't be fancy,
+  // just grab the first possible register.
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  for (; RI != RE; ++RI)
+    if (isPhysRegSpillable(*RI)) {
+      PhysReg = *RI;
+      break;
+    }
+
+  assert(PhysReg && "Physical register not assigned!?!?");
+  spillPhysReg(MBB, I, PhysReg);
+  assignVirtToPhysReg(VirtReg, PhysReg);
+  return PhysReg;
+}
+
+
+/// reloadVirtReg - This method transforms the specified virtual
+/// register use to refer to a physical register.  This method may do this in
+/// one of several ways: if the register is available in a physical register
+/// already, it uses that physical register.  If the value is not in a physical
+/// register, and if there are physical registers available, it loads it into a
+/// register: PhysReg if that is an available physical register, otherwise any
+/// register.  If register pressure is high, and it is possible, it tries to
+/// fold the load of the virtual register into the instruction itself.  It
+/// avoids doing this if register pressure is low to improve the chance that
+/// subsequent instructions can use the reloaded value.  This method returns
+/// the modified instruction.
+///
+MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                     unsigned OpNum,
+                                     SmallSet<unsigned, 4> &ReloadedRegs,
+                                     unsigned PhysReg) {
+  unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+  // If the virtual register is already available, just update the instruction
+  // and return.
+  if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+    MI->getOperand(OpNum).setReg(PR);  // Assign the input register
+    if (!MI->isDebugValue()) {
+      // Do not do these for DBG_VALUE as they can affect codegen.
+      UsedInInstr.set(PR);
+      getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+    }
+    return MI;
+  }
+
+  // Otherwise, we need to fold it into the current instruction, or reload it.
+  // If we have registers available to hold the value, use them.
+  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+  // If we already have a PhysReg (this happens when the instruction is a
+  // reg-to-reg copy with a PhysReg destination) use that.
+  if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
+      !isPhysRegAvailable(PhysReg))
+    PhysReg = getFreeReg(RC);
+  int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+  if (PhysReg) {   // Register is available, allocate it!
+    assignVirtToPhysReg(VirtReg, PhysReg);
+  } else {         // No registers available.
+    // Force some poor hapless value out of the register file to
+    // make room for the new register, and reload it.
+    PhysReg = getReg(MBB, MI, VirtReg, true);
+  }
+
+  markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
+
+  DEBUG(dbgs() << "  Reloading %reg" << VirtReg << " into "
+               << TRI->getName(PhysReg) << "\n");
+
+  // Add move instruction(s)
+  TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+  ++NumLoads;    // Update statistics
+
+  MF->getRegInfo().setPhysRegUsed(PhysReg);
+  MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
+  getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+
+  if (!ReloadedRegs.insert(PhysReg)) {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
+    if (MI->isInlineAsm()) {
+      Msg << "\nPlease check your inline asm statement for invalid "
+           << "constraints:\n";
+      MI->print(Msg, TM);
+    }
+    report_fatal_error(Msg.str());
+  }
+  for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+       *SubRegs; ++SubRegs) {
+    if (ReloadedRegs.insert(*SubRegs)) continue;
+
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
+    if (MI->isInlineAsm()) {
+      Msg << "\nPlease check your inline asm statement for invalid "
+           << "constraints:\n";
+      MI->print(Msg, TM);
+    }
+    report_fatal_error(Msg.str());
+  }
+
+  return MI;
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+        MO.isDef() && !MO.isDead())
+      return true;
+  }
+  return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+        !MO.isDef() && MO.isKill())
+      return true;
+  }
+  return false;
+}
+
+void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) {
+  // loop over each instruction
+  MachineBasicBlock::iterator MII = MBB.begin();
+
+  DEBUG({
+      const BasicBlock *LBB = MBB.getBasicBlock();
+      if (LBB)
+        dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
+    });
+
+  // Add live-in registers as active.
+  for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
+         E = MBB.livein_end(); I != E; ++I) {
+    unsigned Reg = *I;
+    MF->getRegInfo().setPhysRegUsed(Reg);
+    PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         *SubRegs; ++SubRegs) {
+      if (PhysRegsUsed[*SubRegs] == -2) continue;
+      PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+      MF->getRegInfo().setPhysRegUsed(*SubRegs);
+    }
+  }
+
+  // Otherwise, sequentially allocate each instruction in the MBB.
+  while (MII != MBB.end()) {
+    MachineInstr *MI = MII++;
+    const TargetInstrDesc &TID = MI->getDesc();
+    DEBUG({
+        dbgs() << "\nStarting RegAlloc of: " << *MI;
+        dbgs() << "  Regs have values: ";
+        for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
+          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+            dbgs() << "[" << TRI->getName(i)
+                   << ",%reg" << PhysRegsUsed[i] << "] ";
+        dbgs() << '\n';
+      });
+
+    // Track registers used by instruction.
+    UsedInInstr.reset();
+
+    // Determine whether this is a copy instruction.  The cases where the
+    // source or destination are phys regs are handled specially.
+    unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
+    unsigned SrcCopyPhysReg = 0U;
+    bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
+                                   SrcCopySubReg, DstCopySubReg);
+    if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
+      SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
+
+    // Loop over the implicit uses, making sure they don't get reallocated.
+    if (TID.ImplicitUses) {
+      for (const unsigned *ImplicitUses = TID.ImplicitUses;
+           *ImplicitUses; ++ImplicitUses)
+        UsedInInstr.set(*ImplicitUses);
+    }
+
+    SmallVector<unsigned, 8> Kills;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isKill()) continue;
+
+      if (!MO.isImplicit())
+        Kills.push_back(MO.getReg());
+      else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+        // These are extra physical register kills when a sub-register
+        // is defined (def of a sub-register is a read/mod/write of the
+        // larger registers). Ignore.
+        Kills.push_back(MO.getReg());
+    }
+
+    // If any physical regs are earlyclobber, spill any value they might
+    // have in them, then mark them unallocatable.
+    // If any virtual regs are earlyclobber, allocate them now (before
+    // freeing inputs that are killed).
+    if (MI->isInlineAsm()) {
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
+            !MO.getReg())
+          continue;
+
+        if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+          unsigned DestVirtReg = MO.getReg();
+          unsigned DestPhysReg;
+
+          // If DestVirtReg already has a value, use it.
+          if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+            DestPhysReg = getReg(MBB, MI, DestVirtReg);
+          MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+          markVirtRegModified(DestVirtReg);
+          getVirtRegLastUse(DestVirtReg) =
+                 std::make_pair((MachineInstr*)0, 0);
+          DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
+                       << " to %reg" << DestVirtReg << "\n");
+          MO.setReg(DestPhysReg);  // Assign the earlyclobber register
+        } else {
+          unsigned Reg = MO.getReg();
+          if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+          // These are extra physical register defs when a sub-register
+          // is defined (def of a sub-register is a read/mod/write of the
+          // larger registers). Ignore.
+          if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+          MF->getRegInfo().setPhysRegUsed(Reg);
+          spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+
+          for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+               *SubRegs; ++SubRegs) {
+            if (PhysRegsUsed[*SubRegs] == -2) continue;
+            MF->getRegInfo().setPhysRegUsed(*SubRegs);
+            PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+          }
+        }
+      }
+    }
+
+    // If a DBG_VALUE says something is located in a spilled register,
+    // change the DBG_VALUE to be undef, which prevents the register
+    // from being reloaded here.  Doing that would change the generated
+    // code, unless another use immediately follows this instruction.
+    if (MI->isDebugValue() &&
+        MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
+      unsigned VirtReg = MI->getOperand(0).getReg();
+      if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+          !getVirt2PhysRegMapSlot(VirtReg))
+        MI->getOperand(0).setReg(0U);
+    }
+
+    // Get the used operands into registers.  This has the potential to spill
+    // incoming values if we are out of registers.  Note that we completely
+    // ignore physical register uses here.  We assume that if an explicit
+    // physical register is referenced by the instruction, that it is guaranteed
+    // to be live-in, or the input is badly hosed.
+    //
+    SmallSet<unsigned, 4> ReloadedRegs;
+    for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      // here we are looking for only used operands (never def&use)
+      if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
+                           isCopy ? DstCopyReg : 0);
+    }
+
+    // If this instruction is the last user of this register, kill the
+    // value, freeing the register being used, so it doesn't need to be
+    // spilled to memory.
+    //
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+      unsigned VirtReg = Kills[i];
+      unsigned PhysReg = VirtReg;
+      if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        // If the virtual register was never materialized into a register, it
+        // might not be in the map, but it won't hurt to zero it out anyway.
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      } else {
+        assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
+               "Silently clearing a virtual register?");
+      }
+
+      if (!PhysReg) continue;
+
+      DEBUG(dbgs() << "  Last use of " << TRI->getName(PhysReg)
+                   << "[%reg" << VirtReg <<"], removing it from live set\n");
+      removePhysReg(PhysReg);
+      for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+           *SubRegs; ++SubRegs) {
+        if (PhysRegsUsed[*SubRegs] != -2) {
+          DEBUG(dbgs()  << "  Last use of "
+                        << TRI->getName(*SubRegs) << "[%reg" << VirtReg
+                        <<"], removing it from live set\n");
+          removePhysReg(*SubRegs);
+        }
+      }
+    }
+
+    // Track registers defined by instruction.
+    UsedInInstr.reset();
+
+    // Loop over all of the operands of the instruction, spilling registers that
+    // are defined, and marking explicit destinations in the PhysRegsUsed map.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
+          MO.isEarlyClobber() ||
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+        continue;
+
+      unsigned Reg = MO.getReg();
+      if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+      // These are extra physical register defs when a sub-register
+      // is defined (def of a sub-register is a read/mod/write of the
+      // larger registers). Ignore.
+      if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+      MF->getRegInfo().setPhysRegUsed(Reg);
+      spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+      PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+
+      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+           *SubRegs; ++SubRegs) {
+        if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+        MF->getRegInfo().setPhysRegUsed(*SubRegs);
+        PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+      }
+    }
+
+    // Loop over the implicit defs, spilling them as well.
+    if (TID.ImplicitDefs) {
+      for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+           *ImplicitDefs; ++ImplicitDefs) {
+        unsigned Reg = *ImplicitDefs;
+        if (PhysRegsUsed[Reg] != -2) {
+          spillPhysReg(MBB, MI, Reg, true);
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+        }
+        MF->getRegInfo().setPhysRegUsed(Reg);
+        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+             *SubRegs; ++SubRegs) {
+          if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+          PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+          MF->getRegInfo().setPhysRegUsed(*SubRegs);
+        }
+      }
+    }
+
+    SmallVector<unsigned, 8> DeadDefs;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDead())
+        DeadDefs.push_back(MO.getReg());
+    }
+
+    // Okay, we have allocated all of the source operands and spilled any values
+    // that would be destroyed by defs of this instruction.  Loop over the
+    // explicit defs and assign them to a register, spilling incoming values if
+    // we need to scavenge a register.
+    //
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
+          MO.isEarlyClobber() ||
+          !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+
+      unsigned DestVirtReg = MO.getReg();
+      unsigned DestPhysReg;
+
+      // If DestVirtReg already has a value, use it.
+      if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
+        // If this is a copy try to reuse the input as the output;
+        // that will make the copy go away.
+        // If this is a copy, the source reg is a phys reg, and
+        // that reg is available, use that phys reg for DestPhysReg.
+        // If this is a copy, the source reg is a virtual reg, and
+        // the phys reg that was assigned to that virtual reg is now
+        // available, use that phys reg for DestPhysReg.  (If it's now
+        // available that means this was the last use of the source.)
+        if (isCopy &&
+            TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
+            isPhysRegAvailable(SrcCopyReg)) {
+          DestPhysReg = SrcCopyReg;
+          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+        } else if (isCopy &&
+            TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
+            SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
+            MF->getRegInfo().getRegClass(DestVirtReg)->
+                             contains(SrcCopyPhysReg)) {
+          DestPhysReg = SrcCopyPhysReg;
+          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+        } else
+          DestPhysReg = getReg(MBB, MI, DestVirtReg);
+      }
+      MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+      markVirtRegModified(DestVirtReg);
+      getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
+      DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
+                   << " to %reg" << DestVirtReg << "\n");
+      MO.setReg(DestPhysReg);  // Assign the output register
+      UsedInInstr.set(DestPhysReg);
+    }
+
+    // If this instruction defines any registers that are immediately dead,
+    // kill them now.
+    //
+    for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+      unsigned VirtReg = DeadDefs[i];
+      unsigned PhysReg = VirtReg;
+      if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        assert(PhysReg != 0);
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      } else if (!PhysReg)
+        continue;
+
+      DEBUG(dbgs()  << "  Register " << TRI->getName(PhysReg)
+                    << " [%reg" << VirtReg
+                    << "] is never used, removing it from live set\n");
+      removePhysReg(PhysReg);
+      for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+           *AliasSet; ++AliasSet) {
+        if (PhysRegsUsed[*AliasSet] != -2) {
+          DEBUG(dbgs()  << "  Register " << TRI->getName(*AliasSet)
+                        << " [%reg" << *AliasSet
+                        << "] is never used, removing it from live set\n");
+          removePhysReg(*AliasSet);
+        }
+      }
+    }
+
+    // Finally, if this is a noop copy instruction, zap it.  (Except that if
+    // the copy is dead, it must be kept to avoid messing up liveness info for
+    // the register scavenger.  See pr4100.)
+    if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
+                         SrcCopySubReg, DstCopySubReg) &&
+        SrcCopyReg == DstCopyReg && DeadDefs.empty())
+      MBB.erase(MI);
+  }
+
+  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+  // Spill all physical registers holding virtual registers now.
+  for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+    if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+      if (unsigned VirtReg = PhysRegsUsed[i])
+        spillVirtReg(MBB, MI, VirtReg, i);
+      else
+        removePhysReg(i);
+    }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+  DEBUG(dbgs() << "Machine Function\n");
+  MF = &Fn;
+  TM = &Fn.getTarget();
+  TRI = TM->getRegisterInfo();
+  TII = TM->getInstrInfo();
+
+  PhysRegsUsed.assign(TRI->getNumRegs(), -1);
+  UsedInInstr.resize(TRI->getNumRegs());
+
+  // At various places we want to efficiently check to see whether a register
+  // is allocatable.  To handle this, we mark all unallocatable registers as
+  // being pinned down, permanently.
+  {
+    BitVector Allocable = TRI->getAllocatableSet(Fn);
+    for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+      if (!Allocable[i])
+        PhysRegsUsed[i] = -2;  // Mark the reg unallocable.
+  }
+
+  // initialize the virtual->physical register map to have a 'null'
+  // mapping for all virtual registers
+  unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+  StackSlotForVirtReg.grow(LastVirtReg);
+  Virt2PhysRegMap.grow(LastVirtReg);
+  Virt2LastUseMap.grow(LastVirtReg);
+  VirtRegModified.resize(LastVirtReg+1 -
+                         TargetRegisterInfo::FirstVirtualRegister);
+  UsedInMultipleBlocks.resize(LastVirtReg+1 -
+                              TargetRegisterInfo::FirstVirtualRegister);
+
+  // Loop over all of the basic blocks, eliminating virtual register references
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB)
+    AllocateBasicBlock(*MBB);
+
+  StackSlotForVirtReg.clear();
+  PhysRegsUsed.clear();
+  VirtRegModified.clear();
+  UsedInMultipleBlocks.clear();
+  Virt2PhysRegMap.clear();
+  Virt2LastUseMap.clear();
+  return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+  return new RAFast();
+}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5c5a394..6c8fc0c 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -1177,7 +1177,7 @@
         assignRegOrStackSlotAtInterval(cur);
       } else {
         assert(false && "Ran out of registers during register allocation!");
-        llvm_report_error("Ran out of registers during register allocation!");
+        report_fatal_error("Ran out of registers during register allocation!");
       }
       return;
     }
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 04303cf..1885fab 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -118,8 +118,8 @@
 
     bool isVirtRegModified(unsigned Reg) const {
       assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
-      assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
-             && "Illegal virtual register!");
+      assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
+             VirtRegModified.size() && "Illegal virtual register!");
       return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
     }
 
@@ -135,15 +135,16 @@
       if (PhysRegsUseOrder.empty() ||
           PhysRegsUseOrder.back() == Reg) return;  // Already most recently used
 
-      for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i)
-        if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) {
-          unsigned RegMatch = PhysRegsUseOrder[i-1];       // remove from middle
-          PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
-          // Add it to the end of the list
-          PhysRegsUseOrder.push_back(RegMatch);
-          if (RegMatch == Reg)
-            return;    // Found an exact match, exit early
-        }
+      for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) {
+        unsigned RegMatch = PhysRegsUseOrder[i-1];       // remove from middle
+        if (!areRegsEqual(Reg, RegMatch)) continue;
+        
+        PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
+        // Add it to the end of the list
+        PhysRegsUseOrder.push_back(RegMatch);
+        if (RegMatch == Reg)
+          return;    // Found an exact match, exit early
+      }
     }
 
   public:
@@ -267,7 +268,7 @@
   int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
                                                             RC->getAlignment());
 
-  // Assign the slot...
+  // Assign the slot.
   StackSlotForVirtReg[VirtReg] = FrameIdx;
   return FrameIdx;
 }
@@ -337,15 +338,19 @@
     assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
     if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
       spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
-  } else {
-    // If the selected register aliases any other registers, we must make
-    // sure that one of the aliases isn't alive.
-    for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
-         *AliasSet; ++AliasSet)
-      if (PhysRegsUsed[*AliasSet] != -1 &&     // Spill aliased register.
-          PhysRegsUsed[*AliasSet] != -2)       // If allocatable.
-          if (PhysRegsUsed[*AliasSet])
-            spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+    return;
+  }
+  
+  // If the selected register aliases any other registers, we must make
+  // sure that one of the aliases isn't alive.
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet) {
+    if (PhysRegsUsed[*AliasSet] == -1 ||     // Spill aliased register.
+        PhysRegsUsed[*AliasSet] == -2)       // If allocatable.
+      continue;
+  
+    if (PhysRegsUsed[*AliasSet])
+      spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
   }
 }
 
@@ -410,59 +415,64 @@
   // First check to see if we have a free register of the requested type...
   unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
 
+  if (PhysReg != 0) {
+    // Assign the register.
+    assignVirtToPhysReg(VirtReg, PhysReg);
+    return PhysReg;
+  }    
+    
   // If we didn't find an unused register, scavenge one now!
-  if (PhysReg == 0) {
-    assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
+  assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
 
-    // Loop over all of the preallocated registers from the least recently used
-    // to the most recently used.  When we find one that is capable of holding
-    // our register, use it.
-    for (unsigned i = 0; PhysReg == 0; ++i) {
-      assert(i != PhysRegsUseOrder.size() &&
-             "Couldn't find a register of the appropriate class!");
+  // Loop over all of the preallocated registers from the least recently used
+  // to the most recently used.  When we find one that is capable of holding
+  // our register, use it.
+  for (unsigned i = 0; PhysReg == 0; ++i) {
+    assert(i != PhysRegsUseOrder.size() &&
+           "Couldn't find a register of the appropriate class!");
 
-      unsigned R = PhysRegsUseOrder[i];
+    unsigned R = PhysRegsUseOrder[i];
 
-      // We can only use this register if it holds a virtual register (ie, it
-      // can be spilled).  Do not use it if it is an explicitly allocated
-      // physical register!
-      assert(PhysRegsUsed[R] != -1 &&
-             "PhysReg in PhysRegsUseOrder, but is not allocated?");
-      if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
-        // If the current register is compatible, use it.
-        if (RC->contains(R)) {
-          PhysReg = R;
-          break;
-        } else {
-          // If one of the registers aliased to the current register is
-          // compatible, use it.
-          for (const unsigned *AliasIt = TRI->getAliasSet(R);
-               *AliasIt; ++AliasIt) {
-            if (RC->contains(*AliasIt) &&
-                // If this is pinned down for some reason, don't use it.  For
-                // example, if CL is pinned, and we run across CH, don't use
-                // CH as justification for using scavenging ECX (which will
-                // fail).
-                PhysRegsUsed[*AliasIt] != 0 &&
-                
-                // Make sure the register is allocatable.  Don't allocate SIL on
-                // x86-32.
-                PhysRegsUsed[*AliasIt] != -2) {
-              PhysReg = *AliasIt;    // Take an aliased register
-              break;
-            }
-          }
-        }
+    // We can only use this register if it holds a virtual register (ie, it
+    // can be spilled).  Do not use it if it is an explicitly allocated
+    // physical register!
+    assert(PhysRegsUsed[R] != -1 &&
+           "PhysReg in PhysRegsUseOrder, but is not allocated?");
+    if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
+      // If the current register is compatible, use it.
+      if (RC->contains(R)) {
+        PhysReg = R;
+        break;
+      }
+      
+      // If one of the registers aliased to the current register is
+      // compatible, use it.
+      for (const unsigned *AliasIt = TRI->getAliasSet(R);
+           *AliasIt; ++AliasIt) {
+        if (!RC->contains(*AliasIt)) continue;
+        
+        // If this is pinned down for some reason, don't use it.  For
+        // example, if CL is pinned, and we run across CH, don't use
+        // CH as justification for using scavenging ECX (which will
+        // fail).
+        if (PhysRegsUsed[*AliasIt] == 0) continue;
+            
+        // Make sure the register is allocatable.  Don't allocate SIL on
+        // x86-32.
+        if (PhysRegsUsed[*AliasIt] == -2) continue;
+        
+        PhysReg = *AliasIt;    // Take an aliased register
+        break;
       }
     }
-
-    assert(PhysReg && "Physical register not assigned!?!?");
-
-    // At this point PhysRegsUseOrder[i] is the least recently used register of
-    // compatible register class.  Spill it to memory and reap its remains.
-    spillPhysReg(MBB, I, PhysReg);
   }
 
+  assert(PhysReg && "Physical register not assigned!?!?");
+
+  // At this point PhysRegsUseOrder[i] is the least recently used register of
+  // compatible register class.  Spill it to memory and reap its remains.
+  spillPhysReg(MBB, I, PhysReg);
+
   // Now that we know which register we need to assign this to, do it now!
   assignVirtToPhysReg(VirtReg, PhysReg);
   return PhysReg;
@@ -539,21 +549,21 @@
            << "constraints:\n";
       MI->print(Msg, TM);
     }
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
   for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
        *SubRegs; ++SubRegs) {
-    if (!ReloadedRegs.insert(*SubRegs)) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Ran out of registers during register allocation!";
-      if (MI->isInlineAsm()) {
-        Msg << "\nPlease check your inline asm statement for invalid "
-             << "constraints:\n";
-        MI->print(Msg, TM);
-      }
-      llvm_report_error(Msg.str());
+    if (ReloadedRegs.insert(*SubRegs)) continue;
+    
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
+    if (MI->isInlineAsm()) {
+      Msg << "\nPlease check your inline asm statement for invalid "
+           << "constraints:\n";
+      MI->print(Msg, TM);
     }
+    report_fatal_error(Msg.str());
   }
 
   return MI;
@@ -563,7 +573,7 @@
 /// read/mod/write register, i.e. update partial register.
 static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand& MO = MI->getOperand(i);
+    MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
         MO.isDef() && !MO.isDead())
       return true;
@@ -575,7 +585,7 @@
 /// read/mod/write register, i.e. update partial register.
 static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand& MO = MI->getOperand(i);
+    MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
         !MO.isDef() && MO.isKill())
       return true;
@@ -606,7 +616,7 @@
 /// ComputeLocalLiveness - Computes liveness of registers within a basic
 /// block, setting the killed/dead flags as appropriate.
 void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
-  MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   // Keep track of the most recently seen previous use or def of each reg, 
   // so that we can update them with dead/kill markers.
   DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
@@ -614,65 +624,69 @@
        I != E; ++I) {
     if (I->isDebugValue())
       continue;
+    
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-      MachineOperand& MO = I->getOperand(i);
+      MachineOperand &MO = I->getOperand(i);
       // Uses don't trigger any flags, but we need to save
       // them for later.  Also, we have to process these
       // _before_ processing the defs, since an instr
       // uses regs before it defs them.
-      if (MO.isReg() && MO.getReg() && MO.isUse()) {
-        LastUseDef[MO.getReg()] = std::make_pair(I, i);
+      if (!MO.isReg() || !MO.getReg() || !MO.isUse())
+        continue;
+      
+      LastUseDef[MO.getReg()] = std::make_pair(I, i);
+      
+      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
+      
+      const unsigned *Aliases = TRI->getAliasSet(MO.getReg());
+      if (Aliases == 0)
+        continue;
+      
+      while (*Aliases) {
+        DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+          alias = LastUseDef.find(*Aliases);
         
+        if (alias != LastUseDef.end() && alias->second.first != I)
+          LastUseDef[*Aliases] = std::make_pair(I, i);
         
-        if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
-        
-        const unsigned* Aliases = TRI->getAliasSet(MO.getReg());
-        if (Aliases) {
-          while (*Aliases) {
-            DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
-              alias = LastUseDef.find(*Aliases);
-            
-            if (alias != LastUseDef.end() && alias->second.first != I)
-              LastUseDef[*Aliases] = std::make_pair(I, i);
-            
-            ++Aliases;
-          }
-        }
+        ++Aliases;
       }
     }
     
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-      MachineOperand& MO = I->getOperand(i);
+      MachineOperand &MO = I->getOperand(i);
       // Defs others than 2-addr redefs _do_ trigger flag changes:
       //   - A def followed by a def is dead
       //   - A use followed by a def is a kill
-      if (MO.isReg() && MO.getReg() && MO.isDef()) {
-        DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
-          last = LastUseDef.find(MO.getReg());
-        if (last != LastUseDef.end()) {
-          // Check if this is a two address instruction.  If so, then
-          // the def does not kill the use.
-          if (last->second.first == I &&
-              I->isRegTiedToUseOperand(i))
-            continue;
-          
-          MachineOperand& lastUD =
-                      last->second.first->getOperand(last->second.second);
-          if (lastUD.isDef())
-            lastUD.setIsDead(true);
-          else
-            lastUD.setIsKill(true);
-        }
+      if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue;
+      
+      DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+        last = LastUseDef.find(MO.getReg());
+      if (last != LastUseDef.end()) {
+        // Check if this is a two address instruction.  If so, then
+        // the def does not kill the use.
+        if (last->second.first == I &&
+            I->isRegTiedToUseOperand(i))
+          continue;
         
-        LastUseDef[MO.getReg()] = std::make_pair(I, i);
+        MachineOperand &lastUD =
+                    last->second.first->getOperand(last->second.second);
+        if (lastUD.isDef())
+          lastUD.setIsDead(true);
+        else
+          lastUD.setIsKill(true);
       }
+      
+      LastUseDef[MO.getReg()] = std::make_pair(I, i);
     }
   }
   
   // Live-out (of the function) registers contain return values of the function,
   // so we need to make sure they are alive at return time.
-  if (!MBB.empty() && MBB.back().getDesc().isReturn()) {
-    MachineInstr* Ret = &MBB.back();
+  MachineBasicBlock::iterator Ret = MBB.getFirstTerminator();
+  bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn());
+
+  if (BBEndsInReturn)
     for (MachineRegisterInfo::liveout_iterator
          I = MF->getRegInfo().liveout_begin(),
          E = MF->getRegInfo().liveout_end(); I != E; ++I)
@@ -680,15 +694,14 @@
         Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
         LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
       }
-  }
   
   // Finally, loop over the final use/def of each reg 
   // in the block and determine if it is dead.
   for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
        I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
-    MachineInstr* MI = I->second.first;
+    MachineInstr *MI = I->second.first;
     unsigned idx = I->second.second;
-    MachineOperand& MO = MI->getOperand(idx);
+    MachineOperand &MO = MI->getOperand(idx);
     
     bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
     
@@ -696,7 +709,10 @@
     bool usedOutsideBlock = isPhysReg ? false :   
           UsedInMultipleBlocks.test(MO.getReg() -  
                                     TargetRegisterInfo::FirstVirtualRegister);
-    if (!isPhysReg && !usedOutsideBlock) {
+
+    // If the machine BB ends in a return instruction, then the value isn't used
+    // outside of the BB.
+    if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) {
       // DBG_VALUE complicates this:  if the only refs of a register outside
       // this block are DBG_VALUE, we can't keep the reg live just for that,
       // as it will cause the reg to be spilled at the end of this block when
@@ -704,23 +720,27 @@
       // happens.
       bool UsedByDebugValueOnly = false;
       for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
-           UE = MRI.reg_end(); UI != UE; ++UI)
+             UE = MRI.reg_end(); UI != UE; ++UI) {
         // Two cases:
         // - used in another block
         // - used in the same block before it is defined (loop)
-        if (UI->getParent() != &MBB ||
-            (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) {
-          if (UI->isDebugValue()) {
-            UsedByDebugValueOnly = true;
-            continue;
-          }
-          // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
-          UsedInMultipleBlocks.set(MO.getReg() - 
-                                   TargetRegisterInfo::FirstVirtualRegister);
-          usedOutsideBlock = true;
-          UsedByDebugValueOnly = false;
-          break;
+        if (UI->getParent() == &MBB &&
+            !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI)))
+          continue;
+        
+        if (UI->isDebugValue()) {
+          UsedByDebugValueOnly = true;
+          continue;
         }
+
+        // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
+        UsedInMultipleBlocks.set(MO.getReg() - 
+                                 TargetRegisterInfo::FirstVirtualRegister);
+        usedOutsideBlock = true;
+        UsedByDebugValueOnly = false;
+        break;
+      }
+
       if (UsedByDebugValueOnly)
         for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
              UE = MRI.reg_end(); UI != UE; ++UI)
@@ -730,15 +750,16 @@
             UI.getOperand().setReg(0U);
     }
   
-    // Physical registers and those that are not live-out of the block
-    // are killed/dead at their last use/def within this block.
-    if (isPhysReg || !usedOutsideBlock) {
+    // Physical registers and those that are not live-out of the block are
+    // killed/dead at their last use/def within this block.
+    if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) {
       if (MO.isUse()) {
         // Don't mark uses that are tied to defs as kills.
         if (!MI->isRegTiedToDefOperand(idx))
           MO.setIsKill(true);
-      } else
+      } else {
         MO.setIsDead(true);
+      }
     }
   }
 }
@@ -762,11 +783,11 @@
     AddToPhysRegsUseOrder(Reg); 
     for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
          *SubRegs; ++SubRegs) {
-      if (PhysRegsUsed[*SubRegs] != -2) {
-        AddToPhysRegsUseOrder(*SubRegs); 
-        PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
-        MF->getRegInfo().setPhysRegUsed(*SubRegs);
-      }
+      if (PhysRegsUsed[*SubRegs] == -2) continue;
+      
+      AddToPhysRegsUseOrder(*SubRegs); 
+      PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+      MF->getRegInfo().setPhysRegUsed(*SubRegs);
     }
   }
   
@@ -805,16 +826,16 @@
 
     SmallVector<unsigned, 8> Kills;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand& MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isKill()) {
-        if (!MO.isImplicit())
-          Kills.push_back(MO.getReg());
-        else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
-          // These are extra physical register kills when a sub-register
-          // is defined (def of a sub-register is a read/mod/write of the
-          // larger registers). Ignore.
-          Kills.push_back(MO.getReg());
-      }
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isKill()) continue;
+      
+      if (!MO.isImplicit())
+        Kills.push_back(MO.getReg());
+      else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+        // These are extra physical register kills when a sub-register
+        // is defined (def of a sub-register is a read/mod/write of the
+        // larger registers). Ignore.
+        Kills.push_back(MO.getReg());
     }
 
     // If any physical regs are earlyclobber, spill any value they might
@@ -822,45 +843,45 @@
     // If any virtual regs are earlyclobber, allocate them now (before
     // freeing inputs that are killed).
     if (MI->isInlineAsm()) {
-      for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
-        MachineOperand& MO = MI->getOperand(i);
-        if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() &&
-            MO.getReg()) {
-          if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-            unsigned DestVirtReg = MO.getReg();
-            unsigned DestPhysReg;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
+            !MO.getReg())
+          continue;
+          
+        if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+          unsigned DestVirtReg = MO.getReg();
+          unsigned DestPhysReg;
 
-            // If DestVirtReg already has a value, use it.
-            if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
-              DestPhysReg = getReg(MBB, MI, DestVirtReg);
-            MF->getRegInfo().setPhysRegUsed(DestPhysReg);
-            markVirtRegModified(DestVirtReg);
-            getVirtRegLastUse(DestVirtReg) =
-                   std::make_pair((MachineInstr*)0, 0);
-            DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
-                         << " to %reg" << DestVirtReg << "\n");
-            MO.setReg(DestPhysReg);  // Assign the earlyclobber register
-          } else {
-            unsigned Reg = MO.getReg();
-            if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
-            // These are extra physical register defs when a sub-register
-            // is defined (def of a sub-register is a read/mod/write of the
-            // larger registers). Ignore.
-            if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+          // If DestVirtReg already has a value, use it.
+          if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+            DestPhysReg = getReg(MBB, MI, DestVirtReg);
+          MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+          markVirtRegModified(DestVirtReg);
+          getVirtRegLastUse(DestVirtReg) =
+                 std::make_pair((MachineInstr*)0, 0);
+          DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
+                       << " to %reg" << DestVirtReg << "\n");
+          MO.setReg(DestPhysReg);  // Assign the earlyclobber register
+        } else {
+          unsigned Reg = MO.getReg();
+          if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+          // These are extra physical register defs when a sub-register
+          // is defined (def of a sub-register is a read/mod/write of the
+          // larger registers). Ignore.
+          if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
 
-            MF->getRegInfo().setPhysRegUsed(Reg);
-            spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
-            PhysRegsUsed[Reg] = 0;            // It is free and reserved now
-            AddToPhysRegsUseOrder(Reg); 
+          MF->getRegInfo().setPhysRegUsed(Reg);
+          spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+          AddToPhysRegsUseOrder(Reg); 
 
-            for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-                 *SubRegs; ++SubRegs) {
-              if (PhysRegsUsed[*SubRegs] != -2) {
-                MF->getRegInfo().setPhysRegUsed(*SubRegs);
-                PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
-                AddToPhysRegsUseOrder(*SubRegs); 
-              }
-            }
+          for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+               *SubRegs; ++SubRegs) {
+            if (PhysRegsUsed[*SubRegs] == -2) continue;
+            MF->getRegInfo().setPhysRegUsed(*SubRegs);
+            PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+            AddToPhysRegsUseOrder(*SubRegs); 
           }
         }
       }
@@ -886,7 +907,7 @@
     //
     SmallSet<unsigned, 4> ReloadedRegs;
     for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
-      MachineOperand& MO = MI->getOperand(i);
+      MachineOperand &MO = MI->getOperand(i);
       // here we are looking for only used operands (never def&use)
       if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
           TargetRegisterInfo::isVirtualRegister(MO.getReg()))
@@ -915,18 +936,18 @@
                "Silently clearing a virtual register?");
       }
 
-      if (PhysReg) {
-        DEBUG(dbgs() << "  Last use of " << TRI->getName(PhysReg)
-                     << "[%reg" << VirtReg <<"], removing it from live set\n");
-        removePhysReg(PhysReg);
-        for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
-             *SubRegs; ++SubRegs) {
-          if (PhysRegsUsed[*SubRegs] != -2) {
-            DEBUG(dbgs()  << "  Last use of "
-                          << TRI->getName(*SubRegs) << "[%reg" << VirtReg
-                          <<"], removing it from live set\n");
-            removePhysReg(*SubRegs);
-          }
+      if (!PhysReg) continue;
+      
+      DEBUG(dbgs() << "  Last use of " << TRI->getName(PhysReg)
+                   << "[%reg" << VirtReg <<"], removing it from live set\n");
+      removePhysReg(PhysReg);
+      for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+           *SubRegs; ++SubRegs) {
+        if (PhysRegsUsed[*SubRegs] != -2) {
+          DEBUG(dbgs()  << "  Last use of "
+                        << TRI->getName(*SubRegs) << "[%reg" << VirtReg
+                        <<"], removing it from live set\n");
+          removePhysReg(*SubRegs);
         }
       }
     }
@@ -934,30 +955,31 @@
     // Loop over all of the operands of the instruction, spilling registers that
     // are defined, and marking explicit destinations in the PhysRegsUsed map.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand& MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
-          !MO.isEarlyClobber() &&
-          TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-        unsigned Reg = MO.getReg();
-        if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
-        // These are extra physical register defs when a sub-register
-        // is defined (def of a sub-register is a read/mod/write of the
-        // larger registers). Ignore.
-        if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
+          MO.isEarlyClobber() ||
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+        continue;
+      
+      unsigned Reg = MO.getReg();
+      if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+      // These are extra physical register defs when a sub-register
+      // is defined (def of a sub-register is a read/mod/write of the
+      // larger registers). Ignore.
+      if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
 
-        MF->getRegInfo().setPhysRegUsed(Reg);
-        spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
-        PhysRegsUsed[Reg] = 0;            // It is free and reserved now
-        AddToPhysRegsUseOrder(Reg); 
+      MF->getRegInfo().setPhysRegUsed(Reg);
+      spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+      PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+      AddToPhysRegsUseOrder(Reg); 
 
-        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-             *SubRegs; ++SubRegs) {
-          if (PhysRegsUsed[*SubRegs] != -2) {
-            MF->getRegInfo().setPhysRegUsed(*SubRegs);
-            PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
-            AddToPhysRegsUseOrder(*SubRegs); 
-          }
-        }
+      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+           *SubRegs; ++SubRegs) {
+        if (PhysRegsUsed[*SubRegs] == -2) continue;
+        
+        MF->getRegInfo().setPhysRegUsed(*SubRegs);
+        PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+        AddToPhysRegsUseOrder(*SubRegs); 
       }
     }
 
@@ -974,18 +996,18 @@
         MF->getRegInfo().setPhysRegUsed(Reg);
         for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
              *SubRegs; ++SubRegs) {
-          if (PhysRegsUsed[*SubRegs] != -2) {
-            AddToPhysRegsUseOrder(*SubRegs); 
-            PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
-            MF->getRegInfo().setPhysRegUsed(*SubRegs);
-          }
+          if (PhysRegsUsed[*SubRegs] == -2) continue;
+          
+          AddToPhysRegsUseOrder(*SubRegs); 
+          PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+          MF->getRegInfo().setPhysRegUsed(*SubRegs);
         }
       }
     }
 
     SmallVector<unsigned, 8> DeadDefs;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand& MO = MI->getOperand(i);
+      MachineOperand &MO = MI->getOperand(i);
       if (MO.isReg() && MO.isDead())
         DeadDefs.push_back(MO.getReg());
     }
@@ -996,45 +1018,46 @@
     // we need to scavenge a register.
     //
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand& MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isDef() && MO.getReg() &&
-          !MO.isEarlyClobber() &&
-          TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-        unsigned DestVirtReg = MO.getReg();
-        unsigned DestPhysReg;
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
+          MO.isEarlyClobber() ||
+          !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+      
+      unsigned DestVirtReg = MO.getReg();
+      unsigned DestPhysReg;
 
-        // If DestVirtReg already has a value, use it.
-        if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
-          // If this is a copy try to reuse the input as the output;
-          // that will make the copy go away.
-          // If this is a copy, the source reg is a phys reg, and
-          // that reg is available, use that phys reg for DestPhysReg.
-          // If this is a copy, the source reg is a virtual reg, and
-          // the phys reg that was assigned to that virtual reg is now
-          // available, use that phys reg for DestPhysReg.  (If it's now
-          // available that means this was the last use of the source.)
-          if (isCopy &&
-              TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
-              isPhysRegAvailable(SrcCopyReg)) {
-            DestPhysReg = SrcCopyReg;
-            assignVirtToPhysReg(DestVirtReg, DestPhysReg);
-          } else if (isCopy &&
-              TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
-              SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
-              MF->getRegInfo().getRegClass(DestVirtReg)->
-                               contains(SrcCopyPhysReg)) {
-            DestPhysReg = SrcCopyPhysReg;
-            assignVirtToPhysReg(DestVirtReg, DestPhysReg);
-          } else
-            DestPhysReg = getReg(MBB, MI, DestVirtReg);
-        }
-        MF->getRegInfo().setPhysRegUsed(DestPhysReg);
-        markVirtRegModified(DestVirtReg);
-        getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
-        DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
-                     << " to %reg" << DestVirtReg << "\n");
-        MO.setReg(DestPhysReg);  // Assign the output register
+      // If DestVirtReg already has a value, use it.
+      if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
+        // If this is a copy try to reuse the input as the output;
+        // that will make the copy go away.
+        // If this is a copy, the source reg is a phys reg, and
+        // that reg is available, use that phys reg for DestPhysReg.
+        // If this is a copy, the source reg is a virtual reg, and
+        // the phys reg that was assigned to that virtual reg is now
+        // available, use that phys reg for DestPhysReg.  (If it's now
+        // available that means this was the last use of the source.)
+        if (isCopy &&
+            TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
+            isPhysRegAvailable(SrcCopyReg)) {
+          DestPhysReg = SrcCopyReg;
+          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+        } else if (isCopy &&
+            TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
+            SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
+            MF->getRegInfo().getRegClass(DestVirtReg)->
+                             contains(SrcCopyPhysReg)) {
+          DestPhysReg = SrcCopyPhysReg;
+          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+        } else
+          DestPhysReg = getReg(MBB, MI, DestVirtReg);
       }
+      MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+      markVirtRegModified(DestVirtReg);
+      getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
+      DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
+                   << " to %reg" << DestVirtReg << "\n");
+      MO.setReg(DestPhysReg);  // Assign the output register
     }
 
     // If this instruction defines any registers that are immediately dead,
@@ -1051,21 +1074,20 @@
       } else if (PhysRegsUsed[PhysReg] == -2) {
         // Unallocatable register dead, ignore.
         continue;
-      }
-
-      if (PhysReg) {
-        DEBUG(dbgs()  << "  Register " << TRI->getName(PhysReg)
-                      << " [%reg" << VirtReg
-                      << "] is never used, removing it from live set\n");
-        removePhysReg(PhysReg);
-        for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
-             *AliasSet; ++AliasSet) {
-          if (PhysRegsUsed[*AliasSet] != -2) {
-            DEBUG(dbgs()  << "  Register " << TRI->getName(*AliasSet)
-                          << " [%reg" << *AliasSet
-                          << "] is never used, removing it from live set\n");
-            removePhysReg(*AliasSet);
-          }
+      } else if (!PhysReg)
+        continue;
+      
+      DEBUG(dbgs()  << "  Register " << TRI->getName(PhysReg)
+                    << " [%reg" << VirtReg
+                    << "] is never used, removing it from live set\n");
+      removePhysReg(PhysReg);
+      for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+           *AliasSet; ++AliasSet) {
+        if (PhysRegsUsed[*AliasSet] != -2) {
+          DEBUG(dbgs()  << "  Register " << TRI->getName(*AliasSet)
+                        << " [%reg" << *AliasSet
+                        << "] is never used, removing it from live set\n");
+          removePhysReg(*AliasSet);
         }
       }
     }
@@ -1135,8 +1157,10 @@
   StackSlotForVirtReg.grow(LastVirtReg);
   Virt2PhysRegMap.grow(LastVirtReg);
   Virt2LastUseMap.grow(LastVirtReg);
-  VirtRegModified.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
-  UsedInMultipleBlocks.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
+  VirtRegModified.resize(LastVirtReg+1 -
+                         TargetRegisterInfo::FirstVirtualRegister);
+  UsedInMultipleBlocks.resize(LastVirtReg+1 -
+                              TargetRegisterInfo::FirstVirtualRegister);
  
   // Loop over all of the basic blocks, eliminating virtual register references
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 67bf209..179984f 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -64,7 +64,7 @@
     return;
 
   // Live-in registers are in use.
-  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I)
     setUsed(*I);
 
@@ -136,6 +136,9 @@
     ScavengeRestore = NULL;
   }
 
+  if (MI->isDebugValue())
+    return;
+
   // Find out which registers are early clobbered, killed, defined, and marked
   // def-dead in this instruction.
   BitVector EarlyClobberRegs(NumPhysRegs);
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1f3e295..587f001 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -29,7 +29,6 @@
     TRI(TM.getRegisterInfo()),
     TLI(TM.getTargetLowering()),
     MF(mf), MRI(mf.getRegInfo()),
-    ConstPool(MF.getConstantPool()),
     EntrySU(), ExitSU() {
 }
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index badf34e..e8821ae 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -34,6 +34,7 @@
                                      const MachineDominatorTree &mdt)
   : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {
   MFI = mf.getFrameInfo();
+  DbgValueVec.clear();
 }
 
 /// Run - perform scheduling.
@@ -157,6 +158,10 @@
   std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
   std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 
+  // Keep track of dangling debug references to registers.
+  std::pair<MachineInstr*, unsigned>
+        DanglingDebugValue[TargetRegisterInfo::FirstVirtualRegister];
+
   // Check to see if the scheduler cares about latencies.
   bool UnitLatencies = ForceUnitLatencies();
 
@@ -164,10 +169,25 @@
   const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
   unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
 
+  // Remove any stale debug info; sometimes BuildSchedGraph is called again
+  // without emitting the info from the previous call.
+  DbgValueVec.clear();
+  std::memset(DanglingDebugValue, 0, sizeof(DanglingDebugValue));
+
   // Walk the list of instructions, from bottom moving up.
   for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
+    // DBG_VALUE does not have SUnit's built, so just remember these for later
+    // reinsertion.
+    if (MI->isDebugValue()) {
+      if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() &&
+          MI->getOperand(0).getReg())
+        DanglingDebugValue[MI->getOperand(0).getReg()] =
+             std::make_pair(MI, DbgValueVec.size());
+      DbgValueVec.push_back(MI);
+      continue;
+    }
     const TargetInstrDesc &TID = MI->getDesc();
     assert(!TID.isTerminator() && !MI->isLabel() &&
            "Cannot schedule terminators or labels!");
@@ -188,6 +208,13 @@
       if (Reg == 0) continue;
 
       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+
+      if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
+        SU->setDbgInstr(DanglingDebugValue[Reg].first);
+        DbgValueVec[DanglingDebugValue[Reg].second] = 0;
+        DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
+      }
+
       std::vector<SUnit *> &UseList = Uses[Reg];
       std::vector<SUnit *> &DefList = Defs[Reg];
       // Optionally add output and anti dependencies. For anti
@@ -221,48 +248,47 @@
         unsigned DataLatency = SU->Latency;
         for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
           SUnit *UseSU = UseList[i];
-          if (UseSU != SU) {
-            unsigned LDataLatency = DataLatency;
-            // Optionally add in a special extra latency for nodes that
-            // feed addresses.
-            // TODO: Do this for register aliases too.
-            // TODO: Perhaps we should get rid of
-            // SpecialAddressLatency and just move this into
-            // adjustSchedDependency for the targets that care about
-            // it.
-            if (SpecialAddressLatency != 0 && !UnitLatencies) {
-              MachineInstr *UseMI = UseSU->getInstr();
-              const TargetInstrDesc &UseTID = UseMI->getDesc();
-              int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
-              assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
-              if ((UseTID.mayLoad() || UseTID.mayStore()) &&
-                  (unsigned)RegUseIndex < UseTID.getNumOperands() &&
-                  UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
-                LDataLatency += SpecialAddressLatency;
-            }
-            // Adjust the dependence latency using operand def/use
-            // information (if any), and then allow the target to
-            // perform its own adjustments.
-            const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
-            if (!UnitLatencies) {
-              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
-              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
-            }
-            UseSU->addPred(dep);
+          if (UseSU == SU)
+            continue;
+          unsigned LDataLatency = DataLatency;
+          // Optionally add in a special extra latency for nodes that
+          // feed addresses.
+          // TODO: Do this for register aliases too.
+          // TODO: Perhaps we should get rid of
+          // SpecialAddressLatency and just move this into
+          // adjustSchedDependency for the targets that care about it.
+          if (SpecialAddressLatency != 0 && !UnitLatencies) {
+            MachineInstr *UseMI = UseSU->getInstr();
+            const TargetInstrDesc &UseTID = UseMI->getDesc();
+            int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+            assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+            if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+                (unsigned)RegUseIndex < UseTID.getNumOperands() &&
+                UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+              LDataLatency += SpecialAddressLatency;
           }
+          // Adjust the dependence latency using operand def/use
+          // information (if any), and then allow the target to
+          // perform its own adjustments.
+          const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
+          if (!UnitLatencies) {
+            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+          }
+          UseSU->addPred(dep);
         }
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           std::vector<SUnit *> &UseList = Uses[*Alias];
           for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
             SUnit *UseSU = UseList[i];
-            if (UseSU != SU) {
-              const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
-              if (!UnitLatencies) {
-                ComputeOperandLatency(SU, UseSU, (SDep &)dep);
-                ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
-              }
-              UseSU->addPred(dep);
+            if (UseSU == SU)
+              continue;
+            const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
+            if (!UnitLatencies) {
+              ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+              ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
             }
+            UseSU->addPred(dep);
           }
         }
 
@@ -501,7 +527,8 @@
   MachineInstr *DefMI = Def->getInstr();
   int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
   if (DefIdx != -1) {
-    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
+    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
+                                              DefIdx);
     if (DefCycle >= 0) {
       MachineInstr *UseMI = Use->getInstr();
       const unsigned UseClass = UseMI->getDesc().getSchedClass();
@@ -555,6 +582,14 @@
     BB->remove(I);
   }
 
+  // First reinsert any remaining debug_values; these are either constants,
+  // or refer to live-in registers.  The beginning of the block is the right
+  // place for the latter.  The former might reasonably be placed elsewhere
+  // using some kind of ordering algorithm, but right now it doesn't matter.
+  for (int i = DbgValueVec.size()-1; i>=0; --i)
+    if (DbgValueVec[i])
+      BB->insert(InsertPos, DbgValueVec[i]);
+
   // Then re-insert them according to the given schedule.
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     SUnit *SU = Sequence[i];
@@ -565,12 +600,21 @@
     }
 
     BB->insert(InsertPos, SU->getInstr());
+    if (SU->getDbgInstr())
+      BB->insert(InsertPos, SU->getDbgInstr());
   }
 
   // Update the Begin iterator, as the first instruction in the block
   // may have been scheduled later.
-  if (!Sequence.empty())
+  if (!DbgValueVec.empty()) {
+    for (int i = DbgValueVec.size()-1; i>=0; --i)
+      if (DbgValueVec[i]!=0) {
+        Begin = DbgValueVec[DbgValueVec.size()-1];
+        break;
+      }
+  } else if (!Sequence.empty())
     Begin = Sequence[0]->getInstr();
 
+  DbgValueVec.clear();
   return BB;
 }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index 366c3a8..c9b44de 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -106,6 +106,10 @@
     /// initialized and destructed for each block.
     std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister];
     std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister];
+ 
+    /// DbgValueVec - Remember DBG_VALUEs that refer to a particular
+    /// register.
+    std::vector<MachineInstr *>DbgValueVec;
 
     /// PendingLoads - Remember where unknown loads are after the most recent
     /// unknown store, as we iterate. As with Defs and Uses, this is here
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 80c7d7c..0cfd5e1 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -20,6 +20,7 @@
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
   TargetLowering.cpp
+  TargetSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3be6b43..9192593 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,6 +129,14 @@
     bool CombineToPreIndexedLoadStore(SDNode *N);
     bool CombineToPostIndexedLoadStore(SDNode *N);
 
+    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue PromoteIntBinOp(SDValue Op);
+    SDValue PromoteIntShiftOp(SDValue Op);
+    SDValue PromoteExtend(SDValue Op);
+    bool PromoteLoad(SDValue Op);
 
     /// combine - call the node-specific routine that knows how to fold each
     /// particular type of node. If that doesn't do anything, try the
@@ -254,24 +262,28 @@
     /// looking for a better chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
-    /// getShiftAmountTy - Returns a type large enough to hold any valid
-    /// shift amount - before type legalization these can be huge.
-    EVT getShiftAmountTy() {
-      return LegalTypes ?  TLI.getShiftAmountTy() : TLI.getPointerTy();
-    }
-
-public:
+  public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
-      : DAG(D),
-        TLI(D.getTargetLoweringInfo()),
-        Level(Unrestricted),
-        OptLevel(OL),
-        LegalOperations(false),
-        LegalTypes(false),
-        AA(A) {}
+      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+        OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
 
     /// Run - runs the dag combiner on all nodes in the work list
     void Run(CombineLevel AtLevel);
+    
+    SelectionDAG &getDAG() const { return DAG; }
+    
+    /// getShiftAmountTy - Returns a type large enough to hold any valid
+    /// shift amount - before type legalization these can be huge.
+    EVT getShiftAmountTy() {
+      return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+    }
+    
+    /// isTypeLegal - This method returns true if we are running before type
+    /// legalization or if the specified VT is legal.
+    bool isTypeLegal(const EVT &VT) {
+      if (!LegalTypes) return true;
+      return TLI.isTypeLegal(VT);
+    }
   };
 }
 
@@ -577,9 +589,8 @@
   return SDValue(N, 0);
 }
 
-void
-DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
-                                                                          TLO) {
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
   // Replace all uses.  If any nodes become isomorphic to other nodes and
   // are deleted, make sure to remove them from our worklist.
   WorkListRemover DeadNodes(*this);
@@ -609,7 +620,7 @@
 /// it can be simplified or if things it uses can be simplified by bit
 /// propagation.  If so, return true.
 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
-  TargetLowering::TargetLoweringOpt TLO(DAG);
+  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   APInt KnownZero, KnownOne;
   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
     return false;
@@ -629,6 +640,260 @@
   return true;
 }
 
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+  DebugLoc dl = Load->getDebugLoc();
+  EVT VT = Load->getValueType(0);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+  DEBUG(dbgs() << "\nReplacing.9 ";
+        Load->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Trunc.getNode()->dump(&DAG);
+        dbgs() << '\n');
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
+                                &DeadNodes);
+  removeFromWorkList(Load);
+  DAG.DeleteNode(Load);
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+  Replace = false;
+  DebugLoc dl = Op.getDebugLoc();
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+    ISD::LoadExtType ExtType =
+      ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD : LD->getExtensionType();
+    Replace = true;
+    return DAG.getExtLoad(ExtType, dl, PVT,
+                          LD->getChain(), LD->getBasePtr(),
+                          LD->getSrcValue(), LD->getSrcValueOffset(),
+                          LD->getMemoryVT(), LD->isVolatile(),
+                          LD->isNonTemporal(), LD->getAlignment());
+  }
+
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+  default: break;
+  case ISD::AssertSext:
+    return DAG.getNode(ISD::AssertSext, dl, PVT,
+                       SExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::AssertZext:
+    return DAG.getNode(ISD::AssertZext, dl, PVT,
+                       ZExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::Constant: {
+    unsigned ExtOpc =
+      Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    return DAG.getNode(ExtOpc, dl, PVT, Op);
+  }    
+  }
+
+  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+    return SDValue();
+  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+    return SDValue();
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+                     DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace0 = false;
+    SDValue N0 = Op.getOperand(0);
+    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+    if (NN0.getNode() == 0)
+      return SDValue();
+
+    bool Replace1 = false;
+    SDValue N1 = Op.getOperand(1);
+    SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+    if (NN1.getNode() == 0)
+      return SDValue();
+
+    AddToWorkList(NN0.getNode());
+    AddToWorkList(NN1.getNode());
+
+    if (Replace0)
+      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+    if (Replace1)
+      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, NN0, NN1));
+  }
+  return SDValue();
+}
+
+/// PromoteIntShiftOp - Promote the specified integer shift operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace = false;
+    SDValue N0 = Op.getOperand(0);
+    if (Opc == ISD::SRA)
+      N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+    else if (Opc == ISD::SRL)
+      N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+    else
+      N0 = PromoteOperand(N0, PVT, Replace);
+    if (N0.getNode() == 0)
+      return SDValue();
+
+    AddToWorkList(N0.getNode());
+    if (Replace)
+      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+    // fold (aext (aext x)) -> (aext x)
+    // fold (aext (zext x)) -> (zext x)
+    // fold (aext (sext x)) -> (sext x)
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
+  }
+  return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+  if (!LegalOperations)
+    return false;
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return false;
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return false;
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    DebugLoc dl = Op.getDebugLoc();
+    SDNode *N = Op.getNode();
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    ISD::LoadExtType ExtType =
+      ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD : LD->getExtensionType();
+    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+                                   LD->getChain(), LD->getBasePtr(),
+                                   LD->getSrcValue(), LD->getSrcValueOffset(),
+                                   LD->getMemoryVT(), LD->isVolatile(),
+                                   LD->isNonTemporal(), LD->getAlignment());
+    SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+    DEBUG(dbgs() << "\nPromoting ";
+          N->dump(&DAG);
+          dbgs() << "\nTo: ";
+          Result.getNode()->dump(&DAG);
+          dbgs() << '\n');
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+    removeFromWorkList(N);
+    DAG.DeleteNode(N);
+    return true;
+  }
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 //  Main DAG Combiner implementation
 //===----------------------------------------------------------------------===//
@@ -1108,7 +1373,7 @@
                                        N0.getOperand(0).getOperand(1),
                                        N0.getOperand(1)));
 
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitADDC(SDNode *N) {
@@ -1246,7 +1511,7 @@
                                  VT);
     }
 
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitMUL(SDNode *N) {
@@ -1339,7 +1604,7 @@
   if (RMUL.getNode() != 0)
     return RMUL;
 
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitSDIV(SDNode *N) {
@@ -1720,8 +1985,10 @@
   // into a vsetcc.
   EVT Op0VT = N0.getOperand(0).getValueType();
   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
-       N0.getOpcode() == ISD::ANY_EXTEND  ||
        N0.getOpcode() == ISD::SIGN_EXTEND ||
+       // Avoid infinite looping with PromoteIntBinOp.
+       (N0.getOpcode() == ISD::ANY_EXTEND &&
+        (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
        (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
       !VT.isVector() &&
       Op0VT == N1.getOperand(0).getValueType() &&
@@ -1983,7 +2250,7 @@
     }
   }
 
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitOR(SDNode *N) {
@@ -2109,7 +2376,7 @@
   if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
     return SDValue(Rot, 0);
 
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
 }
 
 /// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
@@ -2418,7 +2685,7 @@
       SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
-  return SDValue();
+  return PromoteIntBinOp(SDValue(N, 0));
 }
 
 /// visitShiftByConstant - Handle transforms common to the three shifts, when
@@ -2579,7 +2846,13 @@
                        HiBitsMask);
   }
 
-  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+  if (N1C) {
+    SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSHL.getNode())
+      return NewSHL;
+  }
+
+  return PromoteIntShiftOp(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -2693,7 +2966,13 @@
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
 
-  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+  if (N1C) {
+    SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRA.getNode())
+      return NewSRA;
+  }
+
+  return PromoteIntShiftOp(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitSRL(SDNode *N) {
@@ -2731,6 +3010,15 @@
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(c1 + c2, N1.getValueType()));
   }
+  
+  // fold (srl (shl x, c), c) -> (and x, cst2)
+  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+      N0.getValueSizeInBits() <= 64) {
+    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(~0ULL >> ShAmt, VT));
+  }
+  
 
   // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -2739,10 +3027,12 @@
     if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
       return DAG.getUNDEF(VT);
 
-    SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
-                                     N0.getOperand(0), N1);
-    AddToWorkList(SmallShift.getNode());
-    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+    if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+      SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+                                       N0.getOperand(0), N1);
+      AddToWorkList(SmallShift.getNode());
+      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+    }
   }
 
   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
@@ -2848,7 +3138,7 @@
     }
   }
 
-  return SDValue();
+  return PromoteIntShiftOp(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
@@ -3221,8 +3511,9 @@
     }
     
     // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    unsigned ElementWidth = VT.getScalarType().getSizeInBits();
     SDValue NegOne =
-      DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
     SDValue SCC =
       SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
                        NegOne, DAG.getConstant(0, VT),
@@ -3245,7 +3536,7 @@
       DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
 
-  return SDValue();
+  return PromoteExtend(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
@@ -3408,7 +3699,7 @@
                                    N0.getOperand(1)));
   }
 
-  return SDValue();
+  return PromoteExtend(SDValue(N, 0));
 }
 
 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
@@ -3544,7 +3835,7 @@
       return SCC;
   }
 
-  return SDValue();
+  return PromoteExtend(SDValue(N, 0));
 }
 
 /// GetDemandedBits - See if the specified operand can be simplified with the
@@ -3624,7 +3915,7 @@
   // Do not generate loads of non-round integer types since these can
   // be expensive (and would be wrong if the type is not byte sized).
   if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
-      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -3694,7 +3985,8 @@
   // if x is small enough.
   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
+    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
       return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
   }
 
@@ -3779,7 +4071,8 @@
   if (N0.getOpcode() == ISD::TRUNCATE)
     return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
-  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND ||
       N0.getOpcode() == ISD::ANY_EXTEND) {
     if (N0.getOperand(0).getValueType().bitsLT(VT))
       // if the source is smaller than the dest, we still need an extend
@@ -3805,7 +4098,9 @@
 
   // fold (truncate (load x)) -> (smaller load x)
   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
-  return ReduceLoadWidth(N);
+  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
+    return ReduceLoadWidth(N);
+  return SDValue();
 }
 
 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
@@ -3949,7 +4244,7 @@
       VT.isInteger() && !VT.isVector()) {
     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
-    if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
+    if (isTypeLegal(IntXVT)) {
       SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
                               IntXVT, N0.getOperand(1));
       AddToWorkList(X.getNode());
@@ -4075,8 +4370,8 @@
         if (Op.getOpcode() == ISD::UNDEF) continue;
         EltIsUndef = false;
 
-        NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
-                    zextOrTrunc(SrcBitSize).zext(DstBitSize));
+        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
       }
 
       if (EltIsUndef)
@@ -4464,7 +4759,7 @@
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
 
   // fold (fp_round_inreg c1fp) -> c1fp
-  if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
+  if (N0CFP && isTypeLegal(EVT)) {
     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
     return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
   }
@@ -4605,7 +4900,7 @@
 
   SDNode *Trunc = 0;
   if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
-    // Look pass truncate.
+    // Look past truncate.
     Trunc = N1.getNode();
     N1 = N1.getOperand(0);
   }
@@ -4676,7 +4971,7 @@
     if (Op0.getOpcode() == Op1.getOpcode()) {
       // Avoid missing important xor optimizations.
       SDValue Tmp = visitXOR(TheXor);
-      if (Tmp.getNode()) {
+      if (Tmp.getNode() && Tmp.getNode() != TheXor) {
         DEBUG(dbgs() << "\nReplacing.8 ";
               TheXor->dump(&DAG);
               dbgs() << "\nWith: ";
@@ -4700,7 +4995,9 @@
           Equal = true;
         }
 
-      EVT SetCCVT = N1.getValueType();
+      SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
+      
+      EVT SetCCVT = NodeToReplace.getValueType();
       if (LegalTypes)
         SetCCVT = TLI.getSetCCResultType(SetCCVT);
       SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -4709,9 +5006,9 @@
                                    Equal ? ISD::SETEQ : ISD::SETNE);
       // Replace the uses of XOR with SETCC
       WorkListRemover DeadNodes(*this);
-      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
-      removeFromWorkList(N1.getNode());
-      DAG.DeleteNode(N1.getNode());
+      DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
+      removeFromWorkList(NodeToReplace.getNode());
+      DAG.DeleteNode(NodeToReplace.getNode());
       return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
                          MVT::Other, Chain, SetCC, N2);
     }
@@ -5020,18 +5317,6 @@
   SDValue Chain = LD->getChain();
   SDValue Ptr   = LD->getBasePtr();
 
-  // Try to infer better alignment information than the load already has.
-  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
-    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > LD->getAlignment())
-        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
-                              LD->getValueType(0),
-                              Chain, Ptr, LD->getSrcValue(),
-                              LD->getSrcValueOffset(), LD->getMemoryVT(),
-                              LD->isVolatile(), LD->isNonTemporal(), Align);
-    }
-  }
-
   // If load is not volatile and there are no uses of the loaded value (and
   // the updated indexed value in case of indexed loads), change uses of the
   // chain value into uses of the chain input (i.e. delete the dead load).
@@ -5097,6 +5382,18 @@
     }
   }
 
+  // Try to infer better alignment information than the load already has.
+  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > LD->getAlignment())
+        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+                              LD->getValueType(0),
+                              Chain, Ptr, LD->getSrcValue(),
+                              LD->getSrcValueOffset(), LD->getMemoryVT(),
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
+    }
+  }
+
   if (CombinerAA) {
     // Walk up chain skipping non-aliasing memory nodes.
     SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5140,9 +5437,141 @@
   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
     return SDValue(N, 0);
 
+  if (PromoteLoad(SDValue(N, 0)))
+    return SDValue(N, 0);
   return SDValue();
 }
 
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out.  If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+  std::pair<unsigned, unsigned> Result(0, 0);
+  
+  // Check for the structure we're looking for.
+  if (V->getOpcode() != ISD::AND ||
+      !isa<ConstantSDNode>(V->getOperand(1)) ||
+      !ISD::isNormalLoad(V->getOperand(0).getNode()))
+    return Result;
+  
+  // Check the chain and pointer.
+  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+  if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
+  
+  // The store should be chained directly to the load or be an operand of a
+  // tokenfactor.
+  if (LD == Chain.getNode())
+    ; // ok.
+  else if (Chain->getOpcode() != ISD::TokenFactor)
+    return Result; // Fail.
+  else {
+    bool isOk = false;
+    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+      if (Chain->getOperand(i).getNode() == LD) {
+        isOk = true;
+        break;
+      }
+    if (!isOk) return Result;
+  }
+  
+  // This only handles simple types.
+  if (V.getValueType() != MVT::i16 &&
+      V.getValueType() != MVT::i32 &&
+      V.getValueType() != MVT::i64)
+    return Result;
+
+  // Check the constant mask.  Invert it so that the bits being masked out are
+  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
+  // follow the sign bit for uniformity.
+  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+  unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+  if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
+  unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+  if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
+  if (NotMaskLZ == 64) return Result;  // All zero mask.
+  
+  // See if we have a continuous run of bits.  If so, we have 0*1+0*
+  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+    return Result;
+
+  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+  if (V.getValueType() != MVT::i64 && NotMaskLZ)
+    NotMaskLZ -= 64-V.getValueSizeInBits();
+  
+  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+  switch (MaskedBytes) {
+  case 1: 
+  case 2: 
+  case 4: break;
+  default: return Result; // All one mask, or 5-byte mask.
+  }
+  
+  // Verify that the first bit starts at a multiple of mask so that the access
+  // is aligned the same as the access width.
+  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+  
+  Result.first = MaskedBytes;
+  Result.second = NotMaskTZ/8;
+  return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo.  If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+                                SDValue IVal, StoreSDNode *St,
+                                DAGCombiner *DC) {
+  unsigned NumBytes = MaskInfo.first;
+  unsigned ByteShift = MaskInfo.second;
+  SelectionDAG &DAG = DC->getDAG();
+  
+  // Check to see if IVal is all zeros in the part being masked in by the 'or'
+  // that uses this.  If not, this is not a replacement.
+  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+                                  ByteShift*8, (ByteShift+NumBytes)*8);
+  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+  
+  // Check that it is legal on the target to do this.  It is legal if the new
+  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+  // legalization.
+  MVT VT = MVT::getIntegerVT(NumBytes*8);
+  if (!DC->isTypeLegal(VT))
+    return 0;
+  
+  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
+  // shifted by ByteShift and truncated down to NumBytes.
+  if (ByteShift)
+    IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8, DC->getShiftAmountTy()));
+
+  // Figure out the offset for the store and the alignment of the access.
+  unsigned StOffset;
+  unsigned NewAlign = St->getAlignment();
+
+  if (DAG.getTargetLoweringInfo().isLittleEndian())
+    StOffset = ByteShift;
+  else
+    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+  
+  SDValue Ptr = St->getBasePtr();
+  if (StOffset) {
+    Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    NewAlign = MinAlign(NewAlign, StOffset);
+  }
+  
+  // Truncate down to the new size.
+  IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+  
+  ++OpsNarrowed;
+  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 
+                      St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+                      false, false, NewAlign).getNode();
+}
+
 
 /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
 /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
@@ -5162,6 +5591,28 @@
     return SDValue();
 
   unsigned Opc = Value.getOpcode();
+  
+  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+  // is a byte mask indicating a consecutive number of bytes, check to see if
+  // Y is known to provide just those bytes.  If so, we try to replace the
+  // load + replace + store sequence with a single (narrower) store, which makes
+  // the load dead.
+  if (Opc == ISD::OR) {
+    std::pair<unsigned, unsigned> MaskedLoad;
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(1), ST,this))
+        return SDValue(NewST, 0);
+                                           
+    // Or is commutative, so try swapping X and Y.
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(0), ST,this))
+        return SDValue(NewST, 0);
+  }
+  
   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
       Value.getOperand(1).getOpcode() != ISD::Constant)
     return SDValue();
@@ -5209,8 +5660,8 @@
         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
 
       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
-      if (NewAlign <
-          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
+      const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
         return SDValue();
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -5248,17 +5699,6 @@
   SDValue Value = ST->getValue();
   SDValue Ptr   = ST->getBasePtr();
 
-  // Try to infer better alignment information than the store already has.
-  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
-    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > ST->getAlignment())
-        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
-                                 Ptr, ST->getSrcValue(),
-                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
-                                 ST->isVolatile(), ST->isNonTemporal(), Align);
-    }
-  }
-
   // If this is a store of a bit convert, store the input value if the
   // resultant store does not need a higher alignment than the original.
   if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
@@ -5291,8 +5731,7 @@
       case MVT::ppcf128:
         break;
       case MVT::f32:
-        if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
-             !ST->isVolatile()) ||
+        if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
           Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
                               bitcastToAPInt().getZExtValue(), MVT::i32);
@@ -5303,7 +5742,7 @@
         }
         break;
       case MVT::f64:
-        if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
+        if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
              !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
           Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -5349,6 +5788,17 @@
     }
   }
 
+  // Try to infer better alignment information than the store already has.
+  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > ST->getAlignment())
+        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+                                 Ptr, ST->getSrcValue(),
+                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
+    }
+  }
+
   if (CombinerAA) {
     // Walk up chain skipping non-aliasing memory nodes.
     SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5409,7 +5859,7 @@
     if (SimplifyDemandedBits(Value,
                              APInt::getLowBitsSet(
                                Value.getValueType().getScalarType().getSizeInBits(),
-                               ST->getMemoryVT().getSizeInBits())))
+                               ST->getMemoryVT().getScalarType().getSizeInBits())))
       return SDValue(N, 0);
   }
 
@@ -5549,7 +5999,7 @@
         InVec = InVec.getOperand(0);
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
-        Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
+        Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
       }
     }
 
@@ -5657,7 +6107,7 @@
     }
 
     // Add count and size info.
-    if (!TLI.isTypeLegal(VT) && LegalTypes)
+    if (!isTypeLegal(VT))
       return SDValue();
 
     // Return the new VECTOR_SHUFFLE node.
@@ -6285,7 +6735,7 @@
 /// FindBaseOffset - Return true if base is a frame index, which is known not
 // to alias with anything but itself.  Provides base object and offset as results.
 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
-                           GlobalValue *&GV, void *&CV) {
+                           const GlobalValue *&GV, void *&CV) {
   // Assume it is a primitive operation.
   Base = Ptr; Offset = 0; GV = 0; CV = 0;
 
@@ -6333,7 +6783,7 @@
   // Gather base node and offset information.
   SDValue Base1, Base2;
   int64_t Offset1, Offset2;
-  GlobalValue *GV1, *GV2;
+  const GlobalValue *GV1, *GV2;
   void *CV1, *CV2;
   bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
   bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 1d76c7c..8aa37a0 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,4 +1,4 @@
-///===-- FastISel.cpp - Implementation of the FastISel class --------------===//
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -47,17 +47,16 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "SelectionDAGBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "FunctionLoweringInfo.h"
 using namespace llvm;
 
-unsigned FastISel::getRegForValue(Value *V) {
+unsigned FastISel::getRegForValue(const Value *V) {
   EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
   // Don't handle non-simple values in FastISel.
   if (!RealVT.isSimple())
@@ -85,7 +84,7 @@
   if (Reg != 0)
     return Reg;
 
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     if (CI->getValue().getActiveBits() <= 64)
       Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
   } else if (isa<AllocaInst>(V)) {
@@ -95,10 +94,12 @@
     // local-CSE'd with actual integer zeros.
     Reg =
       getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
-  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+  } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    // Try to emit the constant directly.
     Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
 
     if (!Reg) {
+      // Try to emit the constant by using an integer constant with a cast.
       const APFloat &Flt = CF->getValueAPF();
       EVT IntVT = TLI.getPointerTy();
 
@@ -116,9 +117,9 @@
           Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
       }
     }
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (!SelectOperator(CE, CE->getOpcode())) return 0;
-    Reg = LocalValueMap[CE];
+  } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+    if (!SelectOperator(Op, Op->getOpcode())) return 0;
+    Reg = LocalValueMap[Op];
   } else if (isa<UndefValue>(V)) {
     Reg = createResultReg(TLI.getRegClassFor(VT));
     BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
@@ -136,7 +137,7 @@
   return Reg;
 }
 
-unsigned FastISel::lookUpRegForValue(Value *V) {
+unsigned FastISel::lookUpRegForValue(const Value *V) {
   // Look up the value to see if we already have a register for it. We
   // cache values defined by Instructions across blocks, and other values
   // only locally. This is because Instructions already have the SSA
@@ -152,7 +153,7 @@
 /// NOTE: This is only necessary because we might select a block that uses
 /// a value before we select the block that defines the value.  It might be
 /// possible to fix this by selecting blocks in reverse postorder.
-unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
+unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
   if (!isa<Instruction>(I)) {
     LocalValueMap[I] = Reg;
     return Reg;
@@ -169,7 +170,7 @@
   return AssignedReg;
 }
 
-unsigned FastISel::getRegForGEPIndex(Value *Idx) {
+unsigned FastISel::getRegForGEPIndex(const Value *Idx) {
   unsigned IdxN = getRegForValue(Idx);
   if (IdxN == 0)
     // Unhandled operand. Halt "fast" selection and bail.
@@ -188,7 +189,7 @@
 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
 /// which has an opcode which directly corresponds to the given ISD opcode.
 ///
-bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
   EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !VT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
@@ -254,7 +255,7 @@
   return true;
 }
 
-bool FastISel::SelectGetElementPtr(User *I) {
+bool FastISel::SelectGetElementPtr(const User *I) {
   unsigned N = getRegForValue(I->getOperand(0));
   if (N == 0)
     // Unhandled operand. Halt "fast" selection and bail.
@@ -262,9 +263,9 @@
 
   const Type *Ty = I->getOperand(0)->getType();
   MVT VT = TLI.getPointerTy();
-  for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
-       OI != E; ++OI) {
-    Value *Idx = *OI;
+  for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+       E = I->op_end(); OI != E; ++OI) {
+    const Value *Idx = *OI;
     if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
@@ -282,7 +283,7 @@
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // If this is a constant subscript, handle it quickly.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->getZExtValue() == 0) continue;
         uint64_t Offs = 
           TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
@@ -318,52 +319,56 @@
   return true;
 }
 
-bool FastISel::SelectCall(User *I) {
-  Function *F = cast<CallInst>(I)->getCalledFunction();
+bool FastISel::SelectCall(const User *I) {
+  const Function *F = cast<CallInst>(I)->getCalledFunction();
   if (!F) return false;
 
+  // Handle selected intrinsic function calls.
   unsigned IID = F->getIntrinsicID();
   switch (IID) {
   default: break;
   case Intrinsic::dbg_declare: {
-    DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
-    if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None)||!DW
-        || !DW->ShouldEmitDwarfDebug())
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+    if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) ||
+        !MF.getMMI().hasDebugInfo())
       return true;
 
-    Value *Address = DI->getAddress();
+    const Value *Address = DI->getAddress();
     if (!Address)
       return true;
-    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    if (isa<UndefValue>(Address))
+      return true;
+    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
     // Don't handle byval struct arguments or VLAs, for example.
+    // Note that if we have a byval struct argument, fast ISel is turned off;
+    // those are handled in SelectionDAGBuilder.
     if (!AI) break;
     DenseMap<const AllocaInst*, int>::iterator SI =
       StaticAllocaMap.find(AI);
     if (SI == StaticAllocaMap.end()) break; // VLAs.
     int FI = SI->second;
-    if (MMI) {
-      if (MDNode *Dbg = DI->getMetadata("dbg"))
-        MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
-    }
+    if (!DI->getDebugLoc().isUnknown())
+      MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc());
+
     // Building the map above is target independent.  Generating DBG_VALUE
     // inline is target dependent; do this now.
     (void)TargetSelectInstruction(cast<Instruction>(I));
     return true;
   }
   case Intrinsic::dbg_value: {
-    // This requires target support, but right now X86 is the only Fast target.
-    DbgValueInst *DI = cast<DbgValueInst>(I);
+    // This form of DBG_VALUE is target-independent.
+    const DbgValueInst *DI = cast<DbgValueInst>(I);
     const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
-    Value *V = DI->getValue();
+    const Value *V = DI->getValue();
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
       BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
-    } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
-    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
     } else if (unsigned Reg = lookUpRegForValue(V)) {
@@ -402,54 +407,51 @@
     switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
     default: break;
     case TargetLowering::Expand: {
-      if (MMI) {
-        if (MBB->isLandingPad())
-          AddCatchInfo(*cast<CallInst>(I), MMI, MBB);
-        else {
+      if (MBB->isLandingPad())
+        AddCatchInfo(*cast<CallInst>(I), &MF.getMMI(), MBB);
+      else {
 #ifndef NDEBUG
-          CatchInfoLost.insert(cast<CallInst>(I));
+        CatchInfoLost.insert(cast<CallInst>(I));
 #endif
-          // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-          unsigned Reg = TLI.getExceptionSelectorRegister();
-          if (Reg) MBB->addLiveIn(Reg);
-        }
-
+        // FIXME: Mark exception selector register as live in.  Hack for PR1508.
         unsigned Reg = TLI.getExceptionSelectorRegister();
-        EVT SrcVT = TLI.getPointerTy();
-        const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
-        unsigned ResultReg = createResultReg(RC);
-        bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
-                                             RC, RC);
-        assert(InsertedCopy && "Can't copy address registers!");
-        InsertedCopy = InsertedCopy;
-
-        // Cast the register to the type of the selector.
-        if (SrcVT.bitsGT(MVT::i32))
-          ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
-                                 ResultReg);
-        else if (SrcVT.bitsLT(MVT::i32))
-          ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
-                                 ISD::SIGN_EXTEND, ResultReg);
-        if (ResultReg == 0)
-          // Unhandled operand. Halt "fast" selection and bail.
-          return false;
-
-        UpdateValueMap(I, ResultReg);
-      } else {
-        unsigned ResultReg =
-          getRegForValue(Constant::getNullValue(I->getType()));
-        UpdateValueMap(I, ResultReg);
+        if (Reg) MBB->addLiveIn(Reg);
       }
+
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      EVT SrcVT = TLI.getPointerTy();
+      const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+      unsigned ResultReg = createResultReg(RC);
+      bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
+                                           RC, RC);
+      assert(InsertedCopy && "Can't copy address registers!");
+      InsertedCopy = InsertedCopy;
+
+      // Cast the register to the type of the selector.
+      if (SrcVT.bitsGT(MVT::i32))
+        ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+                               ResultReg);
+      else if (SrcVT.bitsLT(MVT::i32))
+        ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+                               ISD::SIGN_EXTEND, ResultReg);
+      if (ResultReg == 0)
+        // Unhandled operand. Halt "fast" selection and bail.
+        return false;
+
+      UpdateValueMap(I, ResultReg);
+
       return true;
     }
     }
     break;
   }
   }
+
+  // An arbitrary call. Bail.
   return false;
 }
 
-bool FastISel::SelectCast(User *I, unsigned Opcode) {
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
     
@@ -501,7 +503,7 @@
   return true;
 }
 
-bool FastISel::SelectBitCast(User *I) {
+bool FastISel::SelectBitCast(const User *I) {
   // If the bitcast doesn't change the type, just use the operand value.
   if (I->getType() == I->getOperand(0)->getType()) {
     unsigned Reg = getRegForValue(I->getOperand(0));
@@ -552,15 +554,28 @@
 }
 
 bool
-FastISel::SelectInstruction(Instruction *I) {
+FastISel::SelectInstruction(const Instruction *I) {
+  // Just before the terminator instruction, insert instructions to
+  // feed PHI nodes in successor blocks.
+  if (isa<TerminatorInst>(I))
+    if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+      return false;
+
+  DL = I->getDebugLoc();
+
   // First, try doing target-independent selection.
-  if (SelectOperator(I, I->getOpcode()))
+  if (SelectOperator(I, I->getOpcode())) {
+    DL = DebugLoc();
     return true;
+  }
 
   // Next, try calling the target to attempt to handle the instruction.
-  if (TargetSelectInstruction(I))
+  if (TargetSelectInstruction(I)) {
+    DL = DebugLoc();
     return true;
+  }
 
+  DL = DebugLoc();
   return false;
 }
 
@@ -581,7 +596,7 @@
 /// SelectFNeg - Emit an FNeg operation.
 ///
 bool
-FastISel::SelectFNeg(User *I) {
+FastISel::SelectFNeg(const User *I) {
   unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
   if (OpReg == 0) return false;
 
@@ -622,7 +637,7 @@
 }
 
 bool
-FastISel::SelectOperator(User *I, unsigned Opcode) {
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
   switch (Opcode) {
   case Instruction::Add:
     return SelectBinaryOp(I, ISD::ADD);
@@ -668,10 +683,10 @@
     return SelectGetElementPtr(I);
 
   case Instruction::Br: {
-    BranchInst *BI = cast<BranchInst>(I);
+    const BranchInst *BI = cast<BranchInst>(I);
 
     if (BI->isUnconditional()) {
-      BasicBlock *LLVMSucc = BI->getSuccessor(0);
+      const BasicBlock *LLVMSucc = BI->getSuccessor(0);
       MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
       FastEmitBranch(MSucc);
       return true;
@@ -686,10 +701,6 @@
     // Nothing to emit.
     return true;
 
-  case Instruction::PHI:
-    // PHI nodes are already emitted.
-    return true;
-
   case Instruction::Alloca:
     // FunctionLowering has the static-sized case covered.
     if (StaticAllocaMap.count(cast<AllocaInst>(I)))
@@ -729,6 +740,9 @@
     return true;
   }
 
+  case Instruction::PHI:
+    llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
   default:
     // Unhandled instruction. Halt "fast" selection and bail.
     return false;
@@ -736,25 +750,23 @@
 }
 
 FastISel::FastISel(MachineFunction &mf,
-                   MachineModuleInfo *mmi,
-                   DwarfWriter *dw,
                    DenseMap<const Value *, unsigned> &vm,
                    DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
-                   DenseMap<const AllocaInst *, int> &am
+                   DenseMap<const AllocaInst *, int> &am,
+                   std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                   , SmallSet<Instruction*, 8> &cil
+                   , SmallSet<const Instruction *, 8> &cil
 #endif
                    )
   : MBB(0),
     ValueMap(vm),
     MBBMap(bm),
     StaticAllocaMap(am),
+    PHINodesToUpdate(pn),
 #ifndef NDEBUG
     CatchInfoLost(cil),
 #endif
     MF(mf),
-    MMI(mmi),
-    DW(dw),
     MRI(MF.getRegInfo()),
     MFI(*MF.getFrameInfo()),
     MCP(*MF.getConstantPool()),
@@ -787,7 +799,7 @@
 }
 
 unsigned FastISel::FastEmit_f(MVT, MVT,
-                              unsigned, ConstantFP * /*FPImm*/) {
+                              unsigned, const ConstantFP * /*FPImm*/) {
   return 0;
 }
 
@@ -799,7 +811,7 @@
 
 unsigned FastISel::FastEmit_rf(MVT, MVT,
                                unsigned, unsigned /*Op0*/,
-                               ConstantFP * /*FPImm*/) {
+                               const ConstantFP * /*FPImm*/) {
   return 0;
 }
 
@@ -832,7 +844,7 @@
 /// FastEmit_rf. If that fails, it materializes the immediate into a register
 /// and try FastEmit_rr instead.
 unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
-                                unsigned Op0, ConstantFP *FPImm,
+                                unsigned Op0, const ConstantFP *FPImm,
                                 MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the rf form.
   unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
@@ -942,7 +954,7 @@
 
 unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                    const TargetRegisterClass *RC,
-                                   unsigned Op0, ConstantFP *FPImm) {
+                                   unsigned Op0, const ConstantFP *FPImm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 
@@ -1018,3 +1030,67 @@
 unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
   return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
 }
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input.  We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB.  As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+  unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size();
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      // Only handle legal types. Two interesting things to note here. First,
+      // by bailing out early, we may leave behind some dead instructions,
+      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+      // own moves. Second, this check is necessary becuase FastISel doesn't
+      // use CreateRegForValue to create registers, so it always creates
+      // exactly one register for each non-void instruction.
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+        // Promote MVT::i1.
+        if (VT == MVT::i1)
+          VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+        else {
+          PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+          return false;
+        }
+      }
+
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      unsigned Reg = getRegForValue(PHIOp);
+      if (Reg == 0) {
+        PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+        return false;
+      }
+      PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+    }
+  }
+
+  return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 50f4c32..8fe619e 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -14,19 +14,18 @@
 
 #define DEBUG_TYPE "function-lowering-info"
 #include "FunctionLoweringInfo.h"
-#include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
@@ -34,99 +33,21 @@
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
-unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
-                                  const unsigned *Indices,
-                                  const unsigned *IndicesEnd,
-                                  unsigned CurIndex) {
-  // Base case: We're done.
-  if (Indices && Indices == IndicesEnd)
-    return CurIndex;
-
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-        EI != EE; ++EI) {
-      if (Indices && *Indices == unsigned(EI - EB))
-        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // Given an array type, recursively traverse the elements.
-  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
-      if (Indices && *Indices == i)
-        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // We haven't found the type we're looking for, so keep searching.
-  return CurIndex + 1;
-}
-
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                           SmallVectorImpl<EVT> &ValueVTs,
-                           SmallVectorImpl<uint64_t> *Offsets,
-                           uint64_t StartingOffset) {
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-         EI != EE; ++EI)
-      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
-                      StartingOffset + SL->getElementOffset(EI - EB));
-    return;
-  }
-  // Given an array type, recursively traverse the elements.
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
-      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
-                      StartingOffset + i * EltSize);
-    return;
-  }
-  // Interpret void as zero return values.
-  if (Ty->isVoidTy())
-    return;
-  // Base case: we can get an EVT for this LLVM IR type.
-  ValueVTs.push_back(TLI.getValueType(Ty));
-  if (Offsets)
-    Offsets->push_back(StartingOffset);
-}
-
 /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
 /// PHI nodes or outside of the basic block that defines it, or used by a
 /// switch or atomic instruction, which may expand to multiple basic blocks.
-static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+  if (I->use_empty()) return false;
   if (isa<PHINode>(I)) return true;
-  BasicBlock *BB = I->getParent();
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+  const BasicBlock *BB = I->getParent();
+  for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
+        UI != E; ++UI)
     if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
       return true;
   return false;
@@ -135,7 +56,7 @@
 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
 /// entry block, return true.  This includes arguments used by switches, since
 /// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) {
   // With FastISel active, we may be splitting blocks, so force creation
   // of virtual registers for all non-dead arguments.
   // Don't force virtual registers for byval arguments though, because
@@ -143,18 +64,19 @@
   if (EnableFastISel && !A->hasByValAttr())
     return A->use_empty();
 
-  BasicBlock *Entry = A->getParent()->begin();
-  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+  const BasicBlock *Entry = A->getParent()->begin();
+  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+       UI != E; ++UI)
     if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
       return false;  // Use not in entry block.
   return true;
 }
 
-FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
   : TLI(tli) {
 }
 
-void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                bool EnableFastISel) {
   Fn = &fn;
   MF = &mf;
@@ -162,7 +84,7 @@
 
   // Create a vreg for each argument register that is not dead and is used
   // outside of the entry block for the function.
-  for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+  for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
        AI != E; ++AI)
     if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
       InitializeRegForValue(AI);
@@ -170,10 +92,10 @@
   // Initialize the mapping of values to registers.  This is only set up for
   // instruction values that are used outside of the block that defines
   // them.
-  Function::iterator BB = Fn->begin(), EB = Fn->end();
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+  Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
         const Type *Ty = AI->getAllocatedType();
         uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
         unsigned Align =
@@ -187,8 +109,8 @@
       }
 
   for (; BB != EB; ++BB)
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (isUsedOutsideOfDefiningBlock(I))
         if (!isa<AllocaInst>(I) ||
             !StaticAllocaMap.count(cast<AllocaInst>(I)))
           InitializeRegForValue(I);
@@ -196,7 +118,7 @@
   // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
   // also creates the initial PHI MachineInstrs, though none of the input
   // operands are populated.
-  for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+  for (BB = Fn->begin(); BB != EB; ++BB) {
     MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
     MBBMap[BB] = MBB;
     MF->push_back(MBB);
@@ -209,14 +131,11 @@
 
     // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
     // appropriate.
-    PHINode *PN;
-    DebugLoc DL;
-    for (BasicBlock::iterator
-           I = BB->begin(), E = BB->end(); I != E; ++I) {
+    for (BasicBlock::const_iterator I = BB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      if (PN->use_empty()) continue;
 
-      PN = dyn_cast<PHINode>(I);
-      if (!PN || PN->use_empty()) continue;
-
+      DebugLoc DL = PN->getDebugLoc();
       unsigned PHIReg = ValueMap[PN];
       assert(PHIReg && "PHI node does not have an assigned virtual register!");
 
@@ -232,12 +151,20 @@
       }
     }
   }
+
+  // Mark landing pad blocks.
+  for (BB = Fn->begin(); BB != EB; ++BB)
+    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+      MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
 }
 
 /// clear - Clear out all the function-specific state. This returns this
 /// FunctionLoweringInfo to an empty state, ready to be used for a
 /// different function.
 void FunctionLoweringInfo::clear() {
+  assert(CatchInfoFound.size() == CatchInfoLost.size() &&
+         "Not all catch info was assigned to a landing pad!");
+
   MBBMap.clear();
   ValueMap.clear();
   StaticAllocaMap.clear();
@@ -277,21 +204,12 @@
   return FirstReg;
 }
 
-/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
-  V = V->stripPointerCasts();
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
-  assert ((GV || isa<ConstantPointerNull>(V)) &&
-          "TypeInfo must be a global variable or NULL");
-  return GV;
-}
-
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
-void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
                         MachineBasicBlock *MBB) {
   // Inform the MachineModuleInfo of the personality for this landing pad.
-  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+  const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
   assert(CE->getOpcode() == Instruction::BitCast &&
          isa<Function>(CE->getOperand(0)) &&
          "Personality should be a function");
@@ -299,11 +217,11 @@
 
   // Gather all the type infos for this landing pad and pass them along to
   // MachineModuleInfo.
-  std::vector<GlobalVariable *> TyInfo;
+  std::vector<const GlobalVariable *> TyInfo;
   unsigned N = I.getNumOperands();
 
   for (unsigned i = N - 1; i > 2; --i) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
       unsigned FilterLength = CI->getZExtValue();
       unsigned FirstCatch = i + FilterLength + !FilterLength;
       assert (FirstCatch <= N && "Invalid filter length");
@@ -340,10 +258,11 @@
   }
 }
 
-void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
                          MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
-    if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+  for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end();
+       I != E; ++I)
+    if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
       // Apply the catch info to DestBB.
       AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
 #ifndef NDEBUG
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
index d851e64..c855db8 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
@@ -15,12 +15,16 @@
 #ifndef FUNCTIONLOWERINGINFO_H
 #define FUNCTIONLOWERINGINFO_H
 
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
 #ifndef NDEBUG
 #include "llvm/ADT/SmallSet.h"
 #endif
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/Support/CallSite.h"
 #include <vector>
 
 namespace llvm {
@@ -31,6 +35,7 @@
 class Function;
 class GlobalVariable;
 class Instruction;
+class MachineInstr;
 class MachineBasicBlock;
 class MachineFunction;
 class MachineModuleInfo;
@@ -44,8 +49,8 @@
 ///
 class FunctionLoweringInfo {
 public:
-  TargetLowering &TLI;
-  Function *Fn;
+  const TargetLowering &TLI;
+  const Function *Fn;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
 
@@ -57,13 +62,6 @@
   /// allocated to hold a pointer to the hidden sret parameter.
   unsigned DemoteRegister;
 
-  explicit FunctionLoweringInfo(TargetLowering &TLI);
-
-  /// set - Initialize this FunctionLoweringInfo with the given Function
-  /// and its associated MachineFunction.
-  ///
-  void set(Function &Fn, MachineFunction &MF, bool EnableFastISel);
-
   /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
   DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
 
@@ -78,10 +76,38 @@
   DenseMap<const AllocaInst*, int> StaticAllocaMap;
 
 #ifndef NDEBUG
-  SmallSet<Instruction*, 8> CatchInfoLost;
-  SmallSet<Instruction*, 8> CatchInfoFound;
+  SmallSet<const Instruction *, 8> CatchInfoLost;
+  SmallSet<const Instruction *, 8> CatchInfoFound;
 #endif
 
+  struct LiveOutInfo {
+    unsigned NumSignBits;
+    APInt KnownOne, KnownZero;
+    LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
+  };
+  
+  /// LiveOutRegInfo - Information about live out vregs, indexed by their
+  /// register number offset by 'FirstVirtualRegister'.
+  std::vector<LiveOutInfo> LiveOutRegInfo;
+
+  /// PHINodesToUpdate - A list of phi instructions whose operand list will
+  /// be updated after processing the current basic block.
+  /// TODO: This isn't per-function state, it's per-basic-block state. But
+  /// there's no other convenient place for it to live right now.
+  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+  explicit FunctionLoweringInfo(const TargetLowering &TLI);
+
+  /// set - Initialize this FunctionLoweringInfo with the given Function
+  /// and its associated MachineFunction.
+  ///
+  void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel);
+
+  /// clear - Clear out all the function-specific state. This returns this
+  /// FunctionLoweringInfo to an empty state, ready to be used for a
+  /// different function.
+  void clear();
+
   unsigned MakeReg(EVT VT);
   
   /// isExportedInst - Return true if the specified value is an instruction
@@ -97,53 +123,15 @@
     assert(R == 0 && "Already initialized this value register!");
     return R = CreateRegForValue(V);
   }
-  
-  struct LiveOutInfo {
-    unsigned NumSignBits;
-    APInt KnownOne, KnownZero;
-    LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
-  };
-  
-  /// LiveOutRegInfo - Information about live out vregs, indexed by their
-  /// register number offset by 'FirstVirtualRegister'.
-  std::vector<LiveOutInfo> LiveOutRegInfo;
-
-  /// clear - Clear out all the function-specific state. This returns this
-  /// FunctionLoweringInfo to an empty state, ready to be used for a
-  /// different function.
-  void clear();
 };
 
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
-unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
-                            const unsigned *Indices,
-                            const unsigned *IndicesEnd,
-                            unsigned CurIndex = 0);
-
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                     SmallVectorImpl<EVT> &ValueVTs,
-                     SmallVectorImpl<uint64_t> *Offsets = 0,
-                     uint64_t StartingOffset = 0);
-
-/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *ExtractTypeInfo(Value *V);
-
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
-void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB);
+void AddCatchInfo(const CallInst &I,
+                  MachineModuleInfo *MMI, MachineBasicBlock *MBB);
 
 /// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
-void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
                    MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
 
 } // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 625de11..f857a1c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -15,7 +15,7 @@
 
 #define DEBUG_TYPE "instr-emitter"
 #include "InstrEmitter.h"
-#include "SDDbgValue.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -264,7 +264,8 @@
 InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
                                  unsigned IIOpNum,
                                  const TargetInstrDesc *II,
-                                 DenseMap<SDValue, unsigned> &VRBaseMap) {
+                                 DenseMap<SDValue, unsigned> &VRBaseMap,
+                                 bool IsDebug) {
   assert(Op.getValueType() != MVT::Other &&
          Op.getValueType() != MVT::Flag &&
          "Chain and flag operands should occur at end of operand list!");
@@ -295,7 +296,11 @@
     }
   }
 
-  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
+  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
+                                           false/*isImp*/, false/*isKill*/,
+                                           false/*isDead*/, false/*isUndef*/,
+                                           false/*isEarlyClobber*/,
+                                           0/*SubReg*/, IsDebug));
 }
 
 /// AddOperand - Add the specified operand to the specified machine instr.  II
@@ -305,9 +310,10 @@
 void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
                               unsigned IIOpNum,
                               const TargetInstrDesc *II,
-                              DenseMap<SDValue, unsigned> &VRBaseMap) {
+                              DenseMap<SDValue, unsigned> &VRBaseMap,
+                              bool IsDebug) {
   if (Op.isMachineOpcode()) {
-    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
   } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
   } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
@@ -356,7 +362,7 @@
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Flag &&
            "Chain and flag operands should occur at end of operand list!");
-    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
   }
 }
 
@@ -498,143 +504,163 @@
   assert(isNew && "Node emitted out of order - early");
 }
 
-/// EmitDbgValue - Generate any debug info that refers to this Node.  Constant
-/// dbg_value is not handled here.
-void
-InstrEmitter::EmitDbgValue(SDNode *Node,
-                           DenseMap<SDValue, unsigned> &VRBaseMap,
-                           SDDbgValue *sd) {
-  if (!Node->getHasDebugValue())
-    return;
-  if (!sd)
-    return;
-  unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap);
-  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
-  DebugLoc DL = sd->getDebugLoc();
-  MachineInstr *MI;
-  if (VReg) {
-    MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug).
-                              addImm(sd->getOffset()).
-                              addMetadata(sd->getMDPtr());
-  } else {
-    // Insert an Undef so we can see what we dropped.
-    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
-                                    addMetadata(sd->getMDPtr());
-  }
-  MBB->insert(InsertPos, MI);
-}
-
-/// EmitDbgValue - Generate constant debug info.  No SDNode is involved.
-void
-InstrEmitter::EmitDbgValue(SDDbgValue *sd) {
-  if (!sd)
-    return;
-  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
-  DebugLoc DL = sd->getDebugLoc();
-  MachineInstr *MI;
-  Value *V = sd->getConst();
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()).
-                                   addImm(sd->getOffset()).
-                                   addMetadata(sd->getMDPtr());
-  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
-    MI = BuildMI(*MF, DL, II).addFPImm(CF).addImm(sd->getOffset()).
-                                   addMetadata(sd->getMDPtr());
-  } else {
-    // Insert an Undef so we can see what we dropped.
-    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
-                                    addMetadata(sd->getMDPtr());
-  }
-  MBB->insert(InsertPos, MI);
-}
-
-/// EmitNode - Generate machine code for a node and needed dependencies.
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
 ///
-void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
-                            DenseMap<SDValue, unsigned> &VRBaseMap,
+MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+                                         DenseMap<SDValue, unsigned> &VRBaseMap,
                          DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
-  // If machine instruction
-  if (Node->isMachineOpcode()) {
-    unsigned Opc = Node->getMachineOpcode();
-    
-    // Handle subreg insert/extract specially
-    if (Opc == TargetOpcode::EXTRACT_SUBREG || 
-        Opc == TargetOpcode::INSERT_SUBREG ||
-        Opc == TargetOpcode::SUBREG_TO_REG) {
-      EmitSubregNode(Node, VRBaseMap);
-      return;
-    }
+  uint64_t Offset = SD->getOffset();
+  MDNode* MDPtr = SD->getMDPtr();
+  DebugLoc DL = SD->getDebugLoc();
 
-    // Handle COPY_TO_REGCLASS specially.
-    if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
-      EmitCopyToRegClassNode(Node, VRBaseMap);
-      return;
+  if (SD->getKind() == SDDbgValue::FRAMEIX) {
+    // Stack address; this needs to be lowered in target-dependent fashion.
+    // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+    unsigned FrameIx = SD->getFrameIx();
+    return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+  }
+  // Otherwise, we're going to create an instruction here.
+  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+  if (SD->getKind() == SDDbgValue::SDNODE) {
+    SDNode *Node = SD->getSDNode();
+    SDValue Op = SDValue(Node, SD->getResNo());
+    // It's possible we replaced this SDNode with other(s) and therefore
+    // didn't generate code for it.  It's better to catch these cases where
+    // they happen and transfer the debug info, but trying to guarantee that
+    // in all cases would be very fragile; this is a safeguard for any
+    // that were missed.
+    DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+    if (I==VRBaseMap.end())
+      MIB.addReg(0U);       // undef
+    else
+      AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+                 true /*IsDebug*/);
+  } else if (SD->getKind() == SDDbgValue::CONST) {
+    const Value *V = SD->getConst();
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      MIB.addImm(CI->getSExtValue());
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      MIB.addFPImm(CF);
+    } else {
+      // Could be an Undef.  In any case insert an Undef so we can see what we
+      // dropped.
+      MIB.addReg(0U);
     }
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MIB.addReg(0U);
+  }
 
-    if (Opc == TargetOpcode::IMPLICIT_DEF)
-      // We want a unique VR for each IMPLICIT_DEF use.
-      return;
-    
-    const TargetInstrDesc &II = TII->get(Opc);
-    unsigned NumResults = CountResults(Node);
-    unsigned NodeOperands = CountOperands(Node);
-    bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&
-                          II.getImplicitDefs() != 0;
+  MIB.addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap,
+                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+  unsigned Opc = Node->getMachineOpcode();
+  
+  // Handle subreg insert/extract specially
+  if (Opc == TargetOpcode::EXTRACT_SUBREG || 
+      Opc == TargetOpcode::INSERT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG) {
+    EmitSubregNode(Node, VRBaseMap);
+    return;
+  }
+
+  // Handle COPY_TO_REGCLASS specially.
+  if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+    EmitCopyToRegClassNode(Node, VRBaseMap);
+    return;
+  }
+
+  if (Opc == TargetOpcode::IMPLICIT_DEF)
+    // We want a unique VR for each IMPLICIT_DEF use.
+    return;
+  
+  const TargetInstrDesc &II = TII->get(Opc);
+  unsigned NumResults = CountResults(Node);
+  unsigned NodeOperands = CountOperands(Node);
+  bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
 #ifndef NDEBUG
-    unsigned NumMIOperands = NodeOperands + NumResults;
-    assert((II.getNumOperands() == NumMIOperands ||
-            HasPhysRegOuts || II.isVariadic()) &&
-           "#operands for dag node doesn't match .td file!"); 
+  unsigned NumMIOperands = NodeOperands + NumResults;
+  if (II.isVariadic())
+    assert(NumMIOperands >= II.getNumOperands() &&
+           "Too few operands for a variadic node!");
+  else
+    assert(NumMIOperands >= II.getNumOperands() &&
+           NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
+           "#operands for dag node doesn't match .td file!");
 #endif
 
-    // Create the new machine instruction.
-    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
-    
-    // Add result register values for things that are defined by this
-    // instruction.
-    if (NumResults)
-      CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
-    
-    // Emit all of the actual operands of this instruction, adding them to the
-    // instruction as appropriate.
-    bool HasOptPRefs = II.getNumDefs() > NumResults;
-    assert((!HasOptPRefs || !HasPhysRegOuts) &&
-           "Unable to cope with optional defs and phys regs defs!");
-    unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
-    for (unsigned i = NumSkip; i != NodeOperands; ++i)
-      AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
-                 VRBaseMap);
+  // Create the new machine instruction.
+  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+  
+  // Add result register values for things that are defined by this
+  // instruction.
+  if (NumResults)
+    CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+  
+  // Emit all of the actual operands of this instruction, adding them to the
+  // instruction as appropriate.
+  bool HasOptPRefs = II.getNumDefs() > NumResults;
+  assert((!HasOptPRefs || !HasPhysRegOuts) &&
+         "Unable to cope with optional defs and phys regs defs!");
+  unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+  for (unsigned i = NumSkip; i != NodeOperands; ++i)
+    AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+               VRBaseMap);
 
-    // Transfer all of the memory reference descriptions of this instruction.
-    MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
-                   cast<MachineSDNode>(Node)->memoperands_end());
+  // Transfer all of the memory reference descriptions of this instruction.
+  MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+                 cast<MachineSDNode>(Node)->memoperands_end());
 
-    if (II.usesCustomInsertionHook()) {
-      // Insert this instruction into the basic block using a target
-      // specific inserter which may returns a new basic block.
-      MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
-      InsertPos = MBB->end();
-    } else {
-      MBB->insert(InsertPos, MI);
-    }
-
-    // Additional results must be an physical register def.
-    if (HasPhysRegOuts) {
-      for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
-        unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
-        if (Node->hasAnyUseOfValue(i))
-          EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
-        // If there are no uses, mark the register as dead now, so that
-        // MachineLICM/Sink can see that it's dead. Don't do this if the
-        // node has a Flag value, for the benefit of targets still using
-        // Flag for values in physregs.
-        else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
-          MI->addRegisterDead(Reg, TRI);
-      }
-    }
+  if (II.usesCustomInsertionHook()) {
+    // Insert this instruction into the basic block using a target
+    // specific inserter which may returns a new basic block.
+    MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+    InsertPos = MBB->end();
     return;
   }
+  
+  MBB->insert(InsertPos, MI);
 
+  // Additional results must be an physical register def.
+  if (HasPhysRegOuts) {
+    for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+      unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+      if (Node->hasAnyUseOfValue(i))
+        EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+      // If there are no uses, mark the register as dead now, so that
+      // MachineLICM/Sink can see that it's dead. Don't do this if the
+      // node has a Flag value, for the benefit of targets still using
+      // Flag for values in physregs.
+      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+        MI->addRegisterDead(Reg, TRI);
+    }
+  }
+  
+  // If the instruction has implicit defs and the node doesn't, mark the
+  // implicit def as dead.  If the node has any flag outputs, we don't do this
+  // because we don't know what implicit defs are being used by flagged nodes.
+  if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+    if (const unsigned *IDList = II.getImplicitDefs()) {
+      for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
+           i != e; ++i)
+        MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+    }
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
   switch (Node->getOpcode()) {
   default:
 #ifndef NDEBUG
@@ -684,6 +710,13 @@
     EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
     break;
   }
+  case ISD::EH_LABEL: {
+    MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+            TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+    break;
+  }
+      
   case ISD::INLINEASM: {
     unsigned NumOps = Node->getNumOperands();
     if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
@@ -694,12 +727,12 @@
                                TII->get(TargetOpcode::INLINEASM));
 
     // Add the asm string as an external symbol operand.
-    const char *AsmStr =
-      cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+    SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+    const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
     MI->addOperand(MachineOperand::CreateES(AsmStr));
       
     // Add all of the operand registers to the instruction.
-    for (unsigned i = 2; i != NumOps;) {
+    for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
       unsigned Flags =
         cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
       unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
@@ -707,24 +740,24 @@
       MI->addOperand(MachineOperand::CreateImm(Flags));
       ++i;  // Skip the ID value.
         
-      switch (Flags & 7) {
+      switch (InlineAsm::getKind(Flags)) {
       default: llvm_unreachable("Bad flags!");
-      case 2:   // Def of register.
+        case InlineAsm::Kind_RegDef:
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, true));
         }
         break;
-      case 6:   // Def of earlyclobber register.
+      case InlineAsm::Kind_RegDefEarlyClobber:
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, 
                                                    false, false, true));
         }
         break;
-      case 1:  // Use of register.
-      case 3:  // Immediate.
-      case 4:  // Addressing mode.
+      case InlineAsm::Kind_RegUse:  // Use of register.
+      case InlineAsm::Kind_Imm:  // Immediate.
+      case InlineAsm::Kind_Mem:  // Addressing mode.
         // The addressing mode has been selected, just add all of the
         // operands to the machine instruction.
         for (; NumVals; --NumVals, ++i)
@@ -732,6 +765,13 @@
         break;
       }
     }
+    
+    // Get the mdnode from the asm if it exists and add it to the instruction.
+    SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+    const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+    if (MD)
+      MI->addOperand(MachineOperand::CreateMetadata(MD));
+    
     MBB->insert(InsertPos, MI);
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 4fe9f19..f8fd6ef 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -64,7 +64,8 @@
   void AddRegisterOperand(MachineInstr *MI, SDValue Op,
                           unsigned IIOpNum,
                           const TargetInstrDesc *II,
-                          DenseMap<SDValue, unsigned> &VRBaseMap);
+                          DenseMap<SDValue, unsigned> &VRBaseMap,
+                          bool IsDebug = false);
 
   /// AddOperand - Add the specified operand to the specified machine instr.  II
   /// specifies the instruction information for the node, and IIOpNum is the
@@ -73,7 +74,8 @@
   void AddOperand(MachineInstr *MI, SDValue Op,
                   unsigned IIOpNum,
                   const TargetInstrDesc *II,
-                  DenseMap<SDValue, unsigned> &VRBaseMap);
+                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                  bool IsDebug = false);
 
   /// EmitSubregNode - Generate machine code for subreg nodes.
   ///
@@ -98,21 +100,22 @@
   /// MachineInstr.
   static unsigned CountOperands(SDNode *Node);
 
-  /// EmitDbgValue - Generate any debug info that refers to this Node.  Constant
-  /// dbg_value is not handled here.
-  void EmitDbgValue(SDNode *Node,
-                    DenseMap<SDValue, unsigned> &VRBaseMap,
-                    SDDbgValue* sd);
-
-
-  /// EmitDbgValue - Generate a constant DBG_VALUE.  No node is involved.
-  void EmitDbgValue(SDDbgValue* sd);
+  /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+  ///
+  MachineInstr *EmitDbgValue(SDDbgValue *SD,
+                          DenseMap<SDValue, unsigned> &VRBaseMap,
+                          DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
 
   /// EmitNode - Generate machine code for a node and needed dependencies.
   ///
   void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
                 DenseMap<SDValue, unsigned> &VRBaseMap,
-                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+    if (Node->isMachineOpcode())
+      EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap, EM);
+    else
+      EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+  }
 
   /// getBlock - Return the current basic block.
   MachineBasicBlock *getBlock() { return MBB; }
@@ -123,6 +126,13 @@
   /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
   /// at the given position in the given block.
   InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+  
+private:
+  void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+                       DenseMap<SDValue, unsigned> &VRBaseMap,
+                       DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+  void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
 };
 
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f498263..bedfa57 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -16,7 +16,6 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetFrameInfo.h"
@@ -33,13 +32,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include <map>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -56,7 +53,8 @@
 ///
 namespace {
 class SelectionDAGLegalize {
-  TargetLowering &TLI;
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
   CodeGenOpt::Level OptLevel;
 
@@ -214,7 +212,8 @@
 
 SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
                                            CodeGenOpt::Level ol)
-  : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
+  : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+    DAG(dag), OptLevel(ol),
     ValueTypeActions(TLI.getValueTypeActions()) {
   assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
@@ -410,7 +409,9 @@
       // to the final destination using (unaligned) integer loads and stores.
       EVT StoredVT = ST->getMemoryVT();
       EVT RegVT =
-        TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits()));
+        TLI.getRegisterType(*DAG.getContext(),
+                            EVT::getIntegerVT(*DAG.getContext(),
+                                              StoredVT.getSizeInBits()));
       unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
       unsigned RegBytes = RegVT.getSizeInBits() / 8;
       unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
@@ -446,7 +447,8 @@
       // The last store may be partial.  Do a truncating store.  On big-endian
       // machines this requires an extending load from the stack slot to ensure
       // that the bits are in the right place.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    8 * (StoredBytes - Offset));
 
       // Load from the stack slot.
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
@@ -549,7 +551,8 @@
       }
 
       // The last copy may be partial.  Do an extending load.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    8 * (LoadedBytes - Offset));
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                     LD->getSrcValue(), SVOffset + Offset,
                                     MemVT, LD->isVolatile(),
@@ -969,11 +972,11 @@
     Node->dump( &DAG);
     dbgs() << "\n";
 #endif
-    llvm_unreachable("Do not know how to legalize this operator!");
+    assert(0 && "Do not know how to legalize this operator!");
 
   case ISD::BUILD_VECTOR:
     switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
-    default: llvm_unreachable("This action is not supported yet!");
+    default: assert(0 && "This action is not supported yet!");
     case TargetLowering::Custom:
       Tmp3 = TLI.LowerOperation(Result, DAG);
       if (Tmp3.getNode()) {
@@ -1090,7 +1093,7 @@
       Tmp4 = Result.getValue(1);
 
       switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
-      default: llvm_unreachable("This action is not supported yet!");
+      default: assert(0 && "This action is not supported yet!");
       case TargetLowering::Legal:
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
@@ -1260,7 +1263,7 @@
         Tmp2 = LegalizeOp(Ch);
       } else {
         switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Custom:
           isCustom = true;
           // FALLTHROUGH
@@ -1358,7 +1361,7 @@
 
         EVT VT = Tmp3.getValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1395,7 +1398,8 @@
         // Promote to a byte-sized store with upper bits zero if not
         // storing an integral number of bytes.  For example, promote
         // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
-        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
         Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                    SVOffset, NVT, isVolatile, isNonTemporal,
@@ -1460,7 +1464,7 @@
                                           ST->getOffset());
 
         switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1585,35 +1589,51 @@
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1 = Node->getOperand(0);
   SDValue Tmp2 = Node->getOperand(1);
-  assert((Tmp2.getValueType() == MVT::f32 ||
-          Tmp2.getValueType() == MVT::f64) &&
-          "Ugly special-cased code!");
-  // Get the sign bit of the RHS.
+
+  // Get the sign bit of the RHS.  First obtain a value that has the same
+  // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
   SDValue SignBit;
-  EVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
+  EVT FloatVT = Tmp2.getValueType();
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
   if (isTypeLegal(IVT)) {
+    // Convert to an integer with the same sign bit.
     SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
   } else {
-    assert(isTypeLegal(TLI.getPointerTy()) &&
-            (TLI.getPointerTy() == MVT::i32 || 
-            TLI.getPointerTy() == MVT::i64) &&
-            "Legal type for load?!");
-    SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
-    SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
+    // Store the float to memory, then load the sign part out as an integer.
+    MVT LoadTy = TLI.getPointerTy();
+    // First create a temporary that is aligned for both the load and store.
+    SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+    // Then store the float to it.
     SDValue Ch =
-      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0,
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0,
                    false, false, 0);
-    if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
-      LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
-                            LoadPtr, DAG.getIntPtrConstant(4));
-    SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
-                             Ch, LoadPtr, NULL, 0, MVT::i32,
-                             false, false, 0);
+    if (TLI.isBigEndian()) {
+      assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+      // Load out a legal integer with the same sign bit as the float.
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0);
+    } else { // Little endian
+      SDValue LoadPtr = StackPtr;
+      // The float may be wider than the integer we are going to load.  Advance
+      // the pointer so that the loaded integer will contain the sign bit.
+      unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
+      unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
+      LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
+                            LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+      // Load a legal integer containing the sign bit.
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0);
+      // Move the sign bit to the top bit of the loaded integer.
+      unsigned BitShift = LoadTy.getSizeInBits() -
+        (FloatVT.getSizeInBits() - 8 * ByteOffset);
+      assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
+      if (BitShift)
+        SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
+                              DAG.getConstant(BitShift,TLI.getShiftAmountTy()));
+    }
   }
-  SignBit =
-      DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
-                    SignBit, DAG.getConstant(0, SignBit.getValueType()),
-                    ISD::SETLT);
+  // Now get the sign bit proper, by seeing whether the value is negative.
+  SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+                         SignBit, DAG.getConstant(0, SignBit.getValueType()),
+                         ISD::SETLT);
   // Get the absolute value of the result.
   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
   // Select between the nabs and abs value based on the sign bit of
@@ -1643,8 +1663,7 @@
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
   Chain = SP.getValue(1);
   unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
-  unsigned StackAlign =
-    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
   if (Align > StackAlign)
     SP = DAG.getNode(ISD::AND, dl, VT, SP,
                       DAG.getConstant(-(uint64_t)Align, VT));
@@ -1668,7 +1687,7 @@
   EVT OpVT = LHS.getValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: llvm_unreachable("Unknown condition code action!");
+  default: assert(0 && "Unknown condition code action!");
   case TargetLowering::Legal:
     // Nothing to do.
     break;
@@ -1676,7 +1695,7 @@
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
-    default: llvm_unreachable("Don't know how to expand this condition!");
+    default: assert(0 && "Don't know how to expand this condition!");
     case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
@@ -1723,8 +1742,8 @@
   unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
-  unsigned DestAlign =
-    TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext()));
+  const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+  unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
 
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
@@ -1915,7 +1934,7 @@
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
+  default: assert(0 && "Unexpected request for libcall!");
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
@@ -1932,7 +1951,7 @@
                                                RTLIB::Libcall Call_I128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
+  default: assert(0 && "Unexpected request for libcall!");
   case MVT::i8:   LC = Call_I8; break;
   case MVT::i16:  LC = Call_I16; break;
   case MVT::i32:  LC = Call_I32; break;
@@ -2047,7 +2066,7 @@
   // offset depending on the data type.
   uint64_t FF;
   switch (Op0.getValueType().getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unsupported integer type!");
+  default: assert(0 && "Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
@@ -2167,7 +2186,7 @@
   EVT SHVT = TLI.getShiftAmountTy();
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+  default: assert(0 && "Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2212,7 +2231,7 @@
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              DebugLoc dl) {
   switch (Opc) {
-  default: llvm_unreachable("Cannot expand this yet!");
+  default: assert(0 && "Cannot expand this yet!");
   case ISD::CTPOP: {
     static const uint64_t mask[6] = {
       0x5555555555555555ULL, 0x3333333333333333ULL,
@@ -2318,10 +2337,10 @@
     EVT VT = Node->getValueType(0);
     if (VT.isInteger())
       Results.push_back(DAG.getConstant(0, VT));
-    else if (VT.isFloatingPoint())
+    else {
+      assert(VT.isFloatingPoint() && "Unknown value type!");
       Results.push_back(DAG.getConstantFP(0, VT));
-    else
-      llvm_unreachable("Unknown value type!");
+    }
     break;
   }
   case ISD::TRAP: {
@@ -2416,7 +2435,7 @@
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                        DAG.getConstant(TLI.getTargetData()->
-                                       getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+                          getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
     Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
@@ -2620,6 +2639,12 @@
     Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
                                       RTLIB::REM_F80, RTLIB::REM_PPCF128));
     break;
+  case ISD::FP16_TO_FP32:
+    Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+    break;
+  case ISD::FP32_TO_FP16:
+    Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+    break;
   case ISD::ConstantFP: {
     ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
     // Check to see if this FP immediate is already legal.
@@ -2821,7 +2846,14 @@
       BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
                                RHS);
       TopHalf = BottomHalf.getValue(1);
-    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) {
+    } else {
+      // FIXME: We should be able to fall back to a libcall with an illegal
+      // type in some cases.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+                                               VT.getSizeInBits() * 2)) &&
+             "Don't know how to expand this operation yet!");
       EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
@@ -2830,12 +2862,6 @@
                                DAG.getIntPtrConstant(0));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
                             DAG.getIntPtrConstant(1));
-    } else {
-      // FIXME: We should be able to fall back to a libcall with an illegal
-      // type in some cases.
-      // Also, we can fall back to a division in some cases, but that's a big
-      // performance hit in the general case.
-      llvm_unreachable("Don't know how to expand this operation yet!");
     }
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -2895,7 +2921,7 @@
                                 PseudoSourceValue::getJumpTable(), 0, MemVT,
                                 false, false, 0);
     Addr = LD;
-    if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (TM.getRelocationModel() == Reloc::PIC_) {
       // For PIC, the sequence is:
       // BRIND(load(Jumptable + index) + RelocBase)
       // RelocBase can be JumpTable, GOT or some sort of global base.
@@ -3057,11 +3083,10 @@
     if (OVT.isVector()) {
       ExtOp   = ISD::BIT_CONVERT;
       TruncOp = ISD::BIT_CONVERT;
-    } else if (OVT.isInteger()) {
+    } else {
+      assert(OVT.isInteger() && "Cannot promote logic operation");
       ExtOp   = ISD::ANY_EXTEND;
       TruncOp = ISD::TRUNCATE;
-    } else {
-      llvm_report_error("Cannot promote logic operation");
     }
     // Promote each of the values to the new type.
     Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 35a7c7c..e3eb949 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -79,6 +79,7 @@
     case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
     case ISD::FP_EXTEND:   R = SoftenFloatRes_FP_EXTEND(N); break;
     case ISD::FP_ROUND:    R = SoftenFloatRes_FP_ROUND(N); break;
+    case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
     case ISD::FPOW:        R = SoftenFloatRes_FPOW(N); break;
     case ISD::FPOWI:       R = SoftenFloatRes_FPOWI(N); break;
     case ISD::FREM:        R = SoftenFloatRes_FREM(N); break;
@@ -108,14 +109,16 @@
 SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
   // Convert the inputs to integers, and build a new pair out of them.
   return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0)),
                      BitConvertToInteger(N->getOperand(0)),
                      BitConvertToInteger(N->getOperand(1)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
   return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
-                         TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+                         TLI.getTypeToTransformTo(*DAG.getContext(),
+                                                  N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -332,6 +335,15 @@
   return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
 }
 
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+                     N->getDebugLoc());
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
@@ -480,7 +492,8 @@
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
@@ -522,7 +535,8 @@
   // Sign/zero extend the argument if the libcall takes a larger type.
   SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                            NVT, N->getOperand(0));
-  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl);
+  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+                     &Op, 1, false, dl);
 }
 
 
@@ -548,6 +562,7 @@
   case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
   case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
   case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_UINT(N); break;
+  case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
   case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
   case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
   case ISD::STORE:       Res = SoftenFloatOp_STORE(N, OpNo); break;
@@ -704,6 +719,13 @@
   return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
   SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
   ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -800,6 +822,7 @@
   case ISD::FABS:       ExpandFloatRes_FABS(N, Lo, Hi); break;
   case ISD::FADD:       ExpandFloatRes_FADD(N, Lo, Hi); break;
   case ISD::FCEIL:      ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+  case ISD::FCOPYSIGN:  ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
   case ISD::FCOS:       ExpandFloatRes_FCOS(N, Lo, Hi); break;
   case ISD::FDIV:       ExpandFloatRes_FDIV(N, Lo, Hi); break;
   case ISD::FEXP:       ExpandFloatRes_FEXP(N, Lo, Hi); break;
@@ -873,6 +896,17 @@
   GetPairElements(Call, Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::COPYSIGN_F32,
+                                         RTLIB::COPYSIGN_F64,
+                                         RTLIB::COPYSIGN_F80,
+                                         RTLIB::COPYSIGN_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
@@ -1374,7 +1408,8 @@
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
 
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     ST->getValue().getValueType());
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 81f28ad..548454c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -80,6 +80,8 @@
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:  Res = PromoteIntRes_FP_TO_XINT(N); break;
 
+  case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR:
@@ -202,7 +204,8 @@
       std::swap(Lo, Hi);
 
     InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
-                       EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()),
+                       EVT::getIntegerVT(*DAG.getContext(),
+                                         NOutVT.getSizeInBits()),
                        JoinIntegers(Lo, Hi));
     return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
   }
@@ -324,6 +327,16 @@
                      NVT, Res, DAG.getValueType(N->getValueType(0)));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+  return DAG.getNode(ISD::AssertZext, dl,
+                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
@@ -452,7 +465,7 @@
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
   return DAG.getNode(ISD::SHL, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
 }
 
@@ -543,7 +556,8 @@
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
@@ -634,6 +648,7 @@
   case ISD::STORE:        Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
                                                    OpNo); break;
   case ISD::TRUNCATE:     Res = PromoteIntOp_TRUNCATE(N); break;
+  case ISD::FP16_TO_FP32:
   case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
   case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;
 
@@ -1370,7 +1385,8 @@
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part replicates the sign bit of Lo, make it explicit.
@@ -1390,7 +1406,8 @@
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part must be zero, make it explicit.
@@ -1833,7 +1850,8 @@
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        ExcessBits)));
   }
 }
 
@@ -1955,7 +1973,8 @@
     SplitInteger(Res, Lo, Hi);
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
-    Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
+    Hi = DAG.getZeroExtendInReg(Hi, dl,
+                                EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
   }
 }
 
@@ -2256,7 +2275,8 @@
     unsigned EBytes = ExtVT.getStoreSize();
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
-    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits);
+    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 ExtVT.getSizeInBits() - ExcessBits);
 
     if (ExcessBits < NVT.getSizeInBits()) {
       // Transfer high bits from the top of Lo to the bottom of Hi.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index f3e7ca4..17f131b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -721,7 +721,8 @@
 }
 
 void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for promoted integer");
   AnalyzeNewValue(Result);
 
@@ -731,7 +732,8 @@
 }
 
 void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for softened float");
   AnalyzeNewValue(Result);
 
@@ -762,7 +764,8 @@
 
 void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
                                           SDValue Hi) {
-  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          Hi.getValueType() == Lo.getValueType() &&
          "Invalid type for expanded integer");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
@@ -788,7 +791,8 @@
 
 void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
                                         SDValue Hi) {
-  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          Hi.getValueType() == Lo.getValueType() &&
          "Invalid type for expanded float");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
@@ -832,7 +836,8 @@
 }
 
 void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for widened vector");
   AnalyzeNewValue(Result);
 
@@ -940,7 +945,8 @@
   } else {
     unsigned NumElements = InVT.getVectorNumElements();
     assert(!(NumElements & 1) && "Splitting vector, but not in half!");
-    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2);
+    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), NumElements/2);
   }
 }
 
@@ -980,7 +986,8 @@
   DebugLoc dlLo = Lo.getDebugLoc();
   EVT LVT = Lo.getValueType();
   EVT HVT = Hi.getValueType();
-  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits());
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                              LVT.getSizeInBits() + HVT.getSizeInBits());
 
   Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
   Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
@@ -1082,7 +1089,8 @@
 /// type half the size of Op's.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
-  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2);
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 Op.getValueType().getSizeInBits()/2);
   SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b0af357..d60ad60 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -33,7 +33,7 @@
 /// into small values.
 ///
 class VISIBILITY_HIDDEN DAGTypeLegalizer {
-  TargetLowering &TLI;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
 public:
   // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
@@ -257,6 +257,7 @@
   SDValue PromoteIntRes_CTTZ(SDNode *N);
   SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+  SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
   SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
   SDValue PromoteIntRes_LOAD(LoadSDNode *N);
   SDValue PromoteIntRes_Overflow(SDNode *N);
@@ -406,6 +407,7 @@
   SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
   SDValue SoftenFloatRes_FNEG(SDNode *N);
   SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+  SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
   SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
   SDValue SoftenFloatRes_FPOW(SDNode *N);
   SDValue SoftenFloatRes_FPOWI(SDNode *N);
@@ -429,6 +431,7 @@
   SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
   SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
   SDValue SoftenFloatOp_SETCC(SDNode *N);
   SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -455,6 +458,7 @@
   void ExpandFloatRes_FABS      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FADD      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCEIL     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCOS      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FDIV      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FEXP      (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 5e83b4b..88e1e62 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -173,8 +173,9 @@
   EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
 
   SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                 EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts),
-                                 OldVec);
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, 2*OldElts),
+                               OldVec);
 
   // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
   SDValue Idx = N->getOperand(1);
@@ -268,7 +269,9 @@
     // is no point, and it might create expansion loops).  For example, on
     // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
     EVT OVT = N->getOperand(0).getValueType();
-    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2);
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+                               TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+                               2);
 
     if (isTypeLegal(NVT)) {
       SDValue Parts[2];
@@ -312,8 +315,9 @@
   }
 
   SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
-                                 EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()),
-                                 &NewElts[0], NewElts.size());
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, NewElts.size()),
+                               &NewElts[0], NewElts.size());
 
   // Convert the new vector to the old vector type.
   return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
@@ -380,7 +384,8 @@
   DebugLoc dl = N->getDebugLoc();
 
   StoreSDNode *St = cast<StoreSDNode>(N);
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     St->getValue().getValueType());
   SDValue Chain = St->getChain();
   SDValue Ptr = St->getBasePtr();
   int SVOffset = St->getSrcValueOffset();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b5f84c0..0e2bd02 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -34,7 +34,7 @@
 namespace {
 class VectorLegalizer {
   SelectionDAG& DAG;
-  TargetLowering& TLI;
+  const TargetLowering &TLI;
   bool Changed; // Keep track of whether anything changed
 
   /// LegalizedNodes - For nodes that are of legal width, and that have more
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8363c3a..7efeea1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -705,8 +705,9 @@
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
+    TLI.getTargetData()->getPrefTypeAlignment(VecType);
   Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
                             false, false, 0);
 
@@ -1419,7 +1420,8 @@
     ShOp = GetWidenedVector(ShOp);
     ShVT = ShOp.getValueType();
   }
-  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(),
+  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+                                   ShVT.getVectorElementType(),
                                    WidenVT.getVectorNumElements());
   if (ShVT != ShWidenVT)
     ShOp = ModifyToType(ShOp, ShWidenVT);
@@ -1493,7 +1495,8 @@
     unsigned NewNumElts = WidenSize / InSize;
     if (InVT.isVector()) {
       EVT InEltVT = InVT.getVectorElementType();
-      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits());
+      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT,
+                                WidenSize / InEltVT.getSizeInBits());
     } else {
       NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
     }
@@ -1617,7 +1620,8 @@
   SDValue RndOp = N->getOperand(3);
   SDValue SatOp = N->getOperand(4);
 
-  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   EVT InVT = InOp.getValueType();
@@ -1791,7 +1795,8 @@
   EVT CondVT = Cond1.getValueType();
   if (CondVT.isVector()) {
     EVT CondEltVT = CondVT.getVectorElementType();
-    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts);
+    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(),
+                                        CondEltVT, WidenNumElts);
     if (getTypeAction(CondVT) == WidenVector)
       Cond1 = GetWidenedVector(Cond1);
 
@@ -1859,7 +1864,8 @@
   SDValue InOp1 = N->getOperand(0);
   EVT InVT = InOp1.getValueType();
   assert(InVT.isVector() && "can not widen non vector type");
-  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts);
+  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), WidenNumElts);
   InOp1 = GetWidenedVector(InOp1);
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
 
@@ -2124,7 +2130,7 @@
   // The routines chops the vector into the largest vector loads with the same
   // element type or scalar loads and then recombines it to the widen vector
   // type.
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
   unsigned WidenWidth = WidenVT.getSizeInBits();
   EVT LdVT    = LD->getMemoryVT();
   DebugLoc dl = LD->getDebugLoc();
@@ -2167,7 +2173,7 @@
       } else
         return LdOp;
     } else {
-      unsigned NumElts = WidenWidth / LdWidth;
+      unsigned NumElts = WidenWidth / NewVTWidth;
       EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
       SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
       return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
diff --git a/lib/CodeGen/SelectionDAG/SDDbgValue.h b/lib/CodeGen/SelectionDAG/SDDbgValue.h
deleted file mode 100644
index 9e15fc9..0000000
--- a/lib/CodeGen/SelectionDAG/SDDbgValue.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- llvm/CodeGen/SDDbgValue.h - SD dbg_value handling--------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the SDDbgValue class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SDDBGVALUE_H
-#define LLVM_CODEGEN_SDDBGVALUE_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DebugLoc.h"
-
-namespace llvm {
-
-class MDNode;
-class SDNode;
-class Value;
-
-/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
-/// Either Const or Node is nonzero, but not both.
-/// We do not use SDValue here to avoid including its header.
-
-class SDDbgValue {
-  SDNode *Node;           // valid for non-constants
-  unsigned ResNo;         // valid for non-constants
-  Value *Const;           // valid for constants
-  MDNode *mdPtr;
-  uint64_t Offset;
-  DebugLoc DL;
-public:
-  // Constructor for non-constants.
-  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl) :
-    Node(N), ResNo(R), Const(0), mdPtr(mdP), Offset(off), DL(dl) {}
-
-  // Constructor for constants.
-  SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl) : Node(0),
-    ResNo(0), Const(C), mdPtr(mdP), Offset(off), DL(dl) {}
-
-  // Returns the MDNode pointer.
-  MDNode *getMDPtr() { return mdPtr; }
-
-  // Returns the SDNode* (valid for non-constants only).
-  SDNode *getSDNode() { assert (!Const); return Node; }
-
-  // Returns the ResNo (valid for non-constants only).
-  unsigned getResNo() { assert (!Const); return ResNo; }
-
-  // Returns the Value* for a constant (invalid for non-constants).
-  Value *getConst() { assert (!Node); return Const; }
-
-  // Returns the offset.
-  uint64_t getOffset() { return Offset; }
-
-  // Returns the DebugLoc.
-  DebugLoc getDebugLoc() { return DL; }
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 0000000..ac2d338
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,114 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
+#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/System/DataTypes.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+public:
+  enum DbgValueKind {
+    SDNODE = 0,             // value is the result of an expression
+    CONST = 1,              // value is a constant
+    FRAMEIX = 2             // value is contents of a stack location
+  };
+private:
+  enum DbgValueKind kind;
+  union {
+    struct {
+      SDNode *Node;         // valid for expressions
+      unsigned ResNo;       // valid for expressions
+    } s;
+    const Value *Const;     // valid for constants
+    unsigned FrameIx;       // valid for stack objects
+  } u;
+  MDNode *mdPtr;
+  uint64_t Offset;
+  DebugLoc DL;
+  unsigned Order;
+  bool Invalid;
+public:
+  // Constructor for non-constants.
+  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
+             unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O),
+                           Invalid(false) {
+    kind = SDNODE;
+    u.s.Node = N;
+    u.s.ResNo = R;
+  }
+
+  // Constructor for constants.
+  SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
+             unsigned O) : 
+    mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+    kind = CONST;
+    u.Const = C;
+  }
+
+  // Constructor for frame indices.
+  SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : 
+    mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+    kind = FRAMEIX;
+    u.FrameIx = FI;
+  }
+
+  // Returns the kind.
+  DbgValueKind getKind() { return kind; }
+
+  // Returns the MDNode pointer.
+  MDNode *getMDPtr() { return mdPtr; }
+
+  // Returns the SDNode* for a register ref
+  SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+
+  // Returns the ResNo for a register ref
+  unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+
+  // Returns the Value* for a constant
+  const Value *getConst() { assert (kind==CONST); return u.Const; }
+
+  // Returns the FrameIx for a stack object
+  unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+
+  // Returns the offset.
+  uint64_t getOffset() { return Offset; }
+
+  // Returns the DebugLoc.
+  DebugLoc getDebugLoc() { return DL; }
+
+  // Returns the SDNodeOrder.  This is the order of the preceding node in the
+  // input.
+  unsigned getOrder() { return Order; }
+
+  // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+  // property. A SDDbgValue is invalid if the SDNode that produces the value is
+  // deleted.
+  void setIsInvalidated() { Invalid = true; }
+  bool isInvalidated() { return Invalid; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3f1766d..da02850 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -17,18 +17,19 @@
 
 #define DEBUG_TYPE "pre-RA-sched"
 #include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
@@ -647,13 +648,14 @@
       if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
         --NumOps;  // Ignore the flag operand.
 
-      for (unsigned i = 2; i != NumOps;) {
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
         unsigned Flags =
           cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
-        unsigned NumVals = (Flags & 0xffff) >> 3;
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
 
         ++i; // Skip the ID value.
-        if ((Flags & 7) == 2 || (Flags & 7) == 6) {
+        if (InlineAsm::isRegDefKind(Flags) ||
+            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
           // Check for def of register or earlyclobber register.
           for (; NumVals; --NumVals, ++i) {
             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 06e7b8c..c3e7854 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
+#include "SDNodeDbgValue.h"
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -22,6 +23,7 @@
 #include "llvm/Target/TargetSubtarget.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
@@ -351,8 +353,8 @@
         const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpSU->Latency, PhysReg);
         if (!isChain && !UnitLatencies) {
-          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
-          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+          ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep));
+          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
         SU->addPred(dep);
@@ -406,12 +408,79 @@
   }
 }
 
+namespace {
+  struct OrderSorter {
+    bool operator()(const std::pair<unsigned, MachineInstr*> &A,
+                    const std::pair<unsigned, MachineInstr*> &B) {
+      return A.first < B.first;
+    }
+  };
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule use to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+                           InstrEmitter &Emitter,
+                           DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM,
+                           DenseMap<SDValue, unsigned> &VRBaseMap,
+                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+                           SmallSet<unsigned, 8> &Seen) {
+  unsigned Order = DAG->GetOrdering(N);
+  if (!Order || !Seen.insert(Order))
+    return;
+
+  MachineBasicBlock *BB = Emitter.getBlock();
+  if (BB->empty() || BB->back().isPHI()) {
+    // Did not insert any instruction.
+    Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+    return;
+  }
+
+  Orders.push_back(std::make_pair(Order, &BB->back()));
+  if (!N->getHasDebugValue())
+    return;
+  // Opportunistically insert immediate dbg_value uses, i.e. those with source
+  // order number right after the N.
+  MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+  SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
+  for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+    if (DVs[i]->isInvalidated())
+      continue;
+    unsigned DVOrder = DVs[i]->getOrder();
+    if (DVOrder == ++Order) {
+      MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap, EM);
+      if (DbgMI) {
+        Orders.push_back(std::make_pair(DVOrder, DbgMI));
+        BB->insert(InsertPos, DbgMI);
+      }
+      DVs[i]->setIsInvalidated();
+    }
+  }
+}
+
+
 /// EmitSchedule - Emit the machine code in scheduled order.
 MachineBasicBlock *ScheduleDAGSDNodes::
 EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
   InstrEmitter Emitter(BB, InsertPos);
   DenseMap<SDValue, unsigned> VRBaseMap;
   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+  SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+  SmallSet<unsigned, 8> Seen;
+  bool HasDbg = DAG->hasDebugValues();
+
+  // If this is the first BB, emit byval parameter dbg_value's.
+  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+    for (; PDI != PDE; ++PDI) {
+      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap, EM);
+      if (DbgMI)
+        BB->insert(BB->end(), DbgMI);
+    }
+  }
+
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     SUnit *SU = Sequence[i];
     if (!SU) {
@@ -433,12 +502,83 @@
          N = N->getFlaggedNode())
       FlaggedNodes.push_back(N);
     while (!FlaggedNodes.empty()) {
+      SDNode *N = FlaggedNodes.back();
       Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
                        VRBaseMap, EM);
+      // Remember the source order of the inserted instruction.
+      if (HasDbg)
+        ProcessSourceNode(N, DAG, Emitter, EM, VRBaseMap, Orders, Seen);
       FlaggedNodes.pop_back();
     }
     Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
                      VRBaseMap, EM);
+    // Remember the source order of the inserted instruction.
+    if (HasDbg)
+      ProcessSourceNode(SU->getNode(), DAG, Emitter, EM, VRBaseMap, Orders,
+                        Seen);
+  }
+
+  // Insert all the dbg_values which have not already been inserted in source
+  // order sequence.
+  if (HasDbg) {
+    MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin();
+    while (BBBegin != BB->end() && BBBegin->isPHI())
+      ++BBBegin;
+
+    // Sort the source order instructions and use the order to insert debug
+    // values.
+    std::sort(Orders.begin(), Orders.end(), OrderSorter());
+
+    SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+    SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+    // Now emit the rest according to source order.
+    unsigned LastOrder = 0;
+    MachineInstr *LastMI = 0;
+    for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+      unsigned Order = Orders[i].first;
+      MachineInstr *MI = Orders[i].second;
+      // Insert all SDDbgValue's whose order(s) are before "Order".
+      if (!MI)
+        continue;
+      MachineBasicBlock *MIBB = MI->getParent();
+#ifndef NDEBUG
+      unsigned LastDIOrder = 0;
+#endif
+      for (; DI != DE &&
+             (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+#ifndef NDEBUG
+        assert((*DI)->getOrder() >= LastDIOrder &&
+               "SDDbgValue nodes must be in source order!");
+        LastDIOrder = (*DI)->getOrder();
+#endif
+        if ((*DI)->isInvalidated())
+          continue;
+        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap, EM);
+        if (DbgMI) {
+          if (!LastOrder)
+            // Insert to start of the BB (after PHIs).
+            BB->insert(BBBegin, DbgMI);
+          else {
+            MachineBasicBlock::iterator Pos = MI;
+            MIBB->insert(llvm::next(Pos), DbgMI);
+          }
+        }
+      }
+      LastOrder = Order;
+      LastMI = MI;
+    }
+    // Add trailing DbgValue's before the terminator. FIXME: May want to add
+    // some of them before one or more conditional branches?
+    while (DI != DE) {
+      MachineBasicBlock *InsertBB = Emitter.getBlock();
+      MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
+      if (!(*DI)->isInvalidated()) {
+        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap, EM);
+        if (DbgMI)
+          InsertBB->insert(Pos, DbgMI);
+      }
+      ++DI;
+    }
   }
 
   BB = Emitter.getBlock();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 6b829b6..43d8333 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -59,7 +59,8 @@
       if (isa<JumpTableSDNode>(Node))      return true;
       if (isa<ExternalSymbolSDNode>(Node)) return true;
       if (isa<BlockAddressSDNode>(Node))   return true;
-      if (Node->getOpcode() == ISD::EntryToken) return true;
+      if (Node->getOpcode() == ISD::EntryToken ||
+          isa<MDNodeSDNode>(Node)) return true;
       return false;
     }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 746d4e2..c479f65 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "SDNodeOrdering.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/Constants.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Function.h"
@@ -303,10 +304,6 @@
   return Result;
 }
 
-const TargetMachine &SelectionDAG::getTarget() const {
-  return MF->getTarget();
-}
-
 //===----------------------------------------------------------------------===//
 //                           SDNode Profile Support
 //===----------------------------------------------------------------------===//
@@ -596,6 +593,11 @@
 
   // Remove the ordering of this node.
   Ordering->remove(N);
+
+  // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
+  SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N);
+  for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
+    DbgVals[i]->setIsInvalidated();
 }
 
 /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -786,26 +788,25 @@
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
-  : TLI(tli), FLI(fli), DW(0),
-    EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),
-              getVTList(MVT::Other)),
+SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli)
+  : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli),
+    EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
   Ordering = new SDNodeOrdering();
+  DbgInfo = new SDDbgInfo();
 }
 
-void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
-                        DwarfWriter *dw) {
+void SelectionDAG::init(MachineFunction &mf) {
   MF = &mf;
-  MMI = mmi;
-  DW = dw;
   Context = &mf.getFunction()->getContext();
 }
 
 SelectionDAG::~SelectionDAG() {
   allnodes_clear();
   delete Ordering;
+  DbgInfo->clear();
+  delete DbgInfo;
 }
 
 void SelectionDAG::allnodes_clear() {
@@ -833,6 +834,9 @@
   Root = getEntryNode();
   delete Ordering;
   Ordering = new SDNodeOrdering();
+  DbgInfo->clear();
+  delete DbgInfo;
+  DbgInfo = new SDDbgInfo();
 }
 
 SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
@@ -898,8 +902,7 @@
       return SDValue(N, 0);
 
   if (!N) {
-    N = NodeAllocator.Allocate<ConstantSDNode>();
-    new (N) ConstantSDNode(isT, &Val, EltVT);
+    N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
   }
@@ -908,8 +911,7 @@
   if (VT.isVector()) {
     SmallVector<SDValue, 8> Ops;
     Ops.assign(VT.getVectorNumElements(), Result);
-    Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
-                     VT, &Ops[0], Ops.size());
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
   }
   return Result;
 }
@@ -942,8 +944,7 @@
       return SDValue(N, 0);
 
   if (!N) {
-    N = NodeAllocator.Allocate<ConstantFPSDNode>();
-    new (N) ConstantFPSDNode(isTarget, &V, EltVT);
+    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
   }
@@ -953,8 +954,7 @@
     SmallVector<SDValue, 8> Ops;
     Ops.assign(VT.getVectorNumElements(), Result);
     // FIXME DebugLoc info might be appropriate here
-    Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
-                     VT, &Ops[0], Ops.size());
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
   }
   return Result;
 }
@@ -1002,8 +1002,8 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
-  new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
+  SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT,
+                                                      Offset, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1018,8 +1018,7 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
-  new (N) FrameIndexSDNode(FI, VT, isTarget);
+  SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1038,14 +1037,14 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
-  new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
+  SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
+                                                  TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
                                       unsigned Alignment, int Offset,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
@@ -1064,8 +1063,8 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1091,8 +1090,8 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1106,8 +1105,7 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
-  new (N) BasicBlockSDNode(MBB);
+  SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1122,8 +1120,7 @@
     ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
 
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<VTSDNode>();
-  new (N) VTSDNode(VT);
+  N = new (NodeAllocator) VTSDNode(VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1131,8 +1128,7 @@
 SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
   SDNode *&N = ExternalSymbols[Sym];
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
+  N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1143,8 +1139,7 @@
     TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
                                                                TargetFlags)];
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+  N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1154,8 +1149,7 @@
     CondCodeNodes.resize(Cond+1);
 
   if (CondCodeNodes[Cond] == 0) {
-    CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>();
-    new (N) CondCodeSDNode(Cond);
+    CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
     CondCodeNodes[Cond] = N;
     AllNodes.push_back(N);
   }
@@ -1260,8 +1254,8 @@
   int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
   memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
 
-  ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
-  new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+  ShuffleVectorSDNode *N =
+    new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1284,8 +1278,8 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
-  new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
+  CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5,
+                                                           Code);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1299,32 +1293,29 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
-  new (N) RegisterSDNode(RegNo, VT);
+  SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
-                               SDValue Root,
-                               unsigned LabelID) {
+SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   FoldingSetNodeID ID;
   SDValue Ops[] = { Root };
-  AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);
-  ID.AddInteger(LabelID);
+  AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+  ID.AddPointer(Label);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
-
-  SDNode *N = NodeAllocator.Allocate<LabelSDNode>();
-  new (N) LabelSDNode(Opcode, dl, Root, LabelID);
+  
+  SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
   unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
@@ -1337,8 +1328,7 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
-  new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1356,13 +1346,29 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
-  new (N) SrcValueSDNode(V);
+  SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
+/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MD);
+  
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+  
+  SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
 /// getShiftAmountOperand - Return the specified value casted to
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
@@ -1911,7 +1917,8 @@
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
-    APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
+    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       BitWidth - Mask.countLeadingZeros());
     ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
     unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
@@ -2260,10 +2267,9 @@
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
   if (!GA) return false;
   if (GA->getOffset() != 0) return false;
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
   if (!GV) return false;
-  MachineModuleInfo *MMI = getMachineModuleInfo();
-  return MMI && MMI->hasDebugInfo();
+  return MF->getMMI().hasDebugInfo();
 }
 
 
@@ -2306,8 +2312,7 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<SDNode>();
-  new (N) SDNode(Opcode, DL, getVTList(VT));
+  SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT));
   CSEMap.InsertNode(N, IP);
 
   AllNodes.push_back(N);
@@ -2322,22 +2327,20 @@
   // Constant fold unary operations with an integer constant operand.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
     const APInt &Val = C->getAPIntValue();
-    unsigned BitWidth = VT.getSizeInBits();
     switch (Opcode) {
     default: break;
     case ISD::SIGN_EXTEND:
-      return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT);
+      return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::TRUNCATE:
-      return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT);
+      return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
       const uint64_t zero[] = {0, 0};
-      // No compile time operations on this type.
-      if (VT==MVT::ppcf128)
-        break;
-      APFloat apf = APFloat(APInt(BitWidth, 2, zero));
+      // No compile time operations on ppcf128.
+      if (VT == MVT::ppcf128) break;
+      APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
                                  APFloat::rmNearestTiesToEven);
@@ -2537,12 +2540,10 @@
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<UnarySDNode>();
-    new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<UnarySDNode>();
-    new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
   }
 
   AllNodes.push_back(N);
@@ -2970,12 +2971,10 @@
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<BinarySDNode>();
-    new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<BinarySDNode>();
-    new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
   }
 
   AllNodes.push_back(N);
@@ -3048,12 +3047,10 @@
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<TernarySDNode>();
-    new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<TernarySDNode>();
-    new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
   }
 
   AllNodes.push_back(N);
@@ -3104,6 +3101,8 @@
 /// operand.
 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
                               DebugLoc dl) {
+  assert(Value.getOpcode() != ISD::UNDEF);
+
   unsigned NumBits = VT.getScalarType().getSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     APInt Val = APInt(NumBits, C->getZExtValue() & 255);
@@ -3142,11 +3141,17 @@
   if (Str.empty()) {
     if (VT.isInteger())
       return DAG.getConstant(0, VT);
-    unsigned NumElts = VT.getVectorNumElements();
-    MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
-    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
-                       DAG.getConstant(0,
-                       EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
+    else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+             VT.getSimpleVT().SimpleTy == MVT::f64)
+      return DAG.getConstantFP(0.0, VT);
+    else if (VT.isVector()) {
+      unsigned NumElts = VT.getVectorNumElements();
+      MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+      return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+                         DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
+                                                             EltVT, NumElts)));
+    } else
+      llvm_unreachable("Expected type!");
   }
 
   assert(!VT.isVector() && "Can't handle vector type here!");
@@ -3187,58 +3192,43 @@
   if (!G)
     return false;
 
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
   if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
     return true;
 
   return false;
 }
 
-/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
-/// to replace the memset / memcpy is below the threshold. It also returns the
-/// types of the sequence of memory ops to perform memset / memcpy.
-static
-bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
-                              SDValue Dst, SDValue Src,
-                              unsigned Limit, uint64_t Size, unsigned &Align,
-                              std::string &Str, bool &isSrcStr,
-                              SelectionDAG &DAG,
-                              const TargetLowering &TLI) {
-  isSrcStr = isMemSrcFromString(Src, Str);
-  bool isSrcConst = isa<ConstantSDNode>(Src);
-  EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
-  bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
-  if (VT != MVT::Other) {
-    const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
-    // If source is a string constant, this will require an unaligned load.
-    if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
-      if (Dst.getOpcode() != ISD::FrameIndex) {
-        // Can't change destination alignment. It requires a unaligned store.
-        if (AllowUnalign)
-          VT = MVT::Other;
-      } else {
-        int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
-        MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-        if (MFI->isFixedObjectIndex(FI)) {
-          // Can't change destination alignment. It requires a unaligned store.
-          if (AllowUnalign)
-            VT = MVT::Other;
-        } else {
-          // Give the stack frame object a larger alignment if needed.
-          if (MFI->getObjectAlignment(FI) < NewAlign)
-            MFI->setObjectAlignment(FI, NewAlign);
-          Align = NewAlign;
-        }
-      }
-    }
-  }
+/// FindOptimalMemOpLowering - Determines the optimial series memory ops
+/// to replace the memset / memcpy. Return true if the number of memory ops
+/// is below the threshold. It returns the types of the sequence of
+/// memory ops to perform memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+                                     unsigned Limit, uint64_t Size,
+                                     unsigned DstAlign, unsigned SrcAlign,
+                                     bool NonScalarIntSafe,
+                                     bool MemcpyStrSrc,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI) {
+  assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+         "Expecting memcpy / memset source to meet alignment requirement!");
+  // If 'SrcAlign' is zero, that means the memory operation does not need load
+  // the value, i.e. memset or memcpy from constant string. Otherwise, it's
+  // the inferred alignment of the source. 'DstAlign', on the other hand, is the
+  // specified alignment of the memory operation. If it is zero, that means
+  // it's possible to change the alignment of the destination. 'MemcpyStrSrc'
+  // indicates whether the memcpy source is constant so it does not need to be
+  // loaded.
+  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
+                                   NonScalarIntSafe, MemcpyStrSrc,
+                                   DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
-    if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
-      VT = MVT::i64;
+    if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
+        TLI.allowsUnalignedMemoryAccesses(VT)) {
+      VT = TLI.getPointerTy();
     } else {
-      switch (Align & 7) {
+      switch (DstAlign & 7) {
       case 0:  VT = MVT::i64; break;
       case 4:  VT = MVT::i32; break;
       case 2:  VT = MVT::i16; break;
@@ -3260,7 +3250,7 @@
     unsigned VTSize = VT.getSizeInBits() / 8;
     while (VTSize > Size) {
       // For now, only use non-vector load / store's for the left-over pieces.
-      if (VT.isVector()) {
+      if (VT.isVector() || VT.isFloatingPoint()) {
         VT = MVT::i64;
         while (!TLI.isTypeLegal(VT))
           VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
@@ -3283,28 +3273,51 @@
 }
 
 static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
-                                         SDValue Chain, SDValue Dst,
-                                         SDValue Src, uint64_t Size,
-                                         unsigned Align, bool AlwaysInline,
-                                         const Value *DstSV, uint64_t DstSVOff,
-                                         const Value *SrcSV, uint64_t SrcSVOff){
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+                                       SDValue Chain, SDValue Dst,
+                                       SDValue Src, uint64_t Size,
+                                       unsigned Align, bool isVol,
+                                       bool AlwaysInline,
+                                       const Value *DstSV, uint64_t DstSVOff,
+                                       const Value *SrcSV, uint64_t SrcSVOff) {
+  // Turn a memcpy of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
 
   // Expand memcpy to a series of load and store ops if the size operand falls
   // below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
+  bool DstAlignCanChange = false;
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  if (Align > SrcAlign)
+    SrcAlign = Align;
+  std::string Str;
+  bool CopyFromStr = isMemSrcFromString(Src, Str);
+  bool isZeroStr = CopyFromStr && Str.empty();
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
     Limit = TLI.getMaxStoresPerMemcpy();
-  unsigned DstAlign = Align;  // Destination alignment can change.
-  std::string Str;
-  bool CopyFromStr;
-  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
-                                Str, CopyFromStr, DAG, TLI))
+  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+                                (DstAlignCanChange ? 0 : Align),
+                                (isZeroStr ? 0 : SrcAlign),
+                                true, CopyFromStr, DAG, TLI))
     return SDValue();
 
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
 
-  bool isZeroStr = CopyFromStr && Str.empty();
   SmallVector<SDValue, 8> OutChains;
   unsigned NumMemOps = MemOps.size();
   uint64_t SrcOff = 0, DstOff = 0;
@@ -3313,16 +3326,17 @@
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
-    if (CopyFromStr && (isZeroStr || !VT.isVector())) {
+    if (CopyFromStr &&
+        (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
       // It's unlikely a store of a vector immediate can be done in a single
       // instruction. It would require a load from a constantpool first.
-      // We also handle store a vector with all zero's.
+      // We only handle zero vectors here.
       // FIXME: Handle other cases where store of vector immediate is done in
       // a single instruction.
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstSV, DstSVOff + DstOff, false, false, DstAlign);
+                           DstSV, DstSVOff + DstOff, isVol, false, Align);
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
@@ -3333,11 +3347,12 @@
       assert(NVT.bitsGE(VT));
       Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
-                             SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
+                             SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
+                             MinAlign(SrcAlign, SrcOff));
       Store = DAG.getTruncStore(Chain, dl, Value,
                                 getMemBasePlusOffset(Dst, DstOff, DAG),
-                                DstSV, DstSVOff + DstOff, VT, false, false,
-                                DstAlign);
+                                DstSV, DstSVOff + DstOff, VT, isVol, false,
+                                Align);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -3349,28 +3364,49 @@
 }
 
 static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
-                                          SDValue Chain, SDValue Dst,
-                                          SDValue Src, uint64_t Size,
-                                          unsigned Align, bool AlwaysInline,
-                                          const Value *DstSV, uint64_t DstSVOff,
-                                          const Value *SrcSV, uint64_t SrcSVOff){
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+                                        SDValue Chain, SDValue Dst,
+                                        SDValue Src, uint64_t Size,
+                                        unsigned Align,  bool isVol,
+                                        bool AlwaysInline,
+                                        const Value *DstSV, uint64_t DstSVOff,
+                                        const Value *SrcSV, uint64_t SrcSVOff) {
+  // Turn a memmove of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
 
   // Expand memmove to a series of load and store ops if the size operand falls
   // below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
     Limit = TLI.getMaxStoresPerMemmove();
-  unsigned DstAlign = Align;  // Destination alignment can change.
-  std::string Str;
-  bool CopyFromStr;
-  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
-                                Str, CopyFromStr, DAG, TLI))
+  bool DstAlignCanChange = false;
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  if (Align > SrcAlign)
+    SrcAlign = Align;
+
+  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+                                (DstAlignCanChange ? 0 : Align),
+                                SrcAlign, true, false, DAG, TLI))
     return SDValue();
 
-  uint64_t SrcOff = 0, DstOff = 0;
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
 
+  uint64_t SrcOff = 0, DstOff = 0;
   SmallVector<SDValue, 8> LoadValues;
   SmallVector<SDValue, 8> LoadChains;
   SmallVector<SDValue, 8> OutChains;
@@ -3382,7 +3418,7 @@
 
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
-                        SrcSV, SrcSVOff + SrcOff, false, false, Align);
+                        SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3397,7 +3433,7 @@
 
     Store = DAG.getStore(Chain, dl, LoadValues[i],
                          getMemBasePlusOffset(Dst, DstOff, DAG),
-                         DstSV, DstSVOff + DstOff, false, false, DstAlign);
+                         DstSV, DstSVOff + DstOff, isVol, false, Align);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3407,24 +3443,43 @@
 }
 
 static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
-                                 SDValue Chain, SDValue Dst,
-                                 SDValue Src, uint64_t Size,
-                                 unsigned Align,
-                                 const Value *DstSV, uint64_t DstSVOff) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+                               SDValue Chain, SDValue Dst,
+                               SDValue Src, uint64_t Size,
+                               unsigned Align, bool isVol,
+                               const Value *DstSV, uint64_t DstSVOff) {
+  // Turn a memset of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
 
   // Expand memset to a series of load/store ops if the size operand
   // falls below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
-  std::string Str;
-  bool CopyFromStr;
-  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
-                                Size, Align, Str, CopyFromStr, DAG, TLI))
+  bool DstAlignCanChange = false;
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  bool NonScalarIntSafe =
+    isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+                                Size, (DstAlignCanChange ? 0 : Align), 0,
+                                NonScalarIntSafe, false, DAG, TLI))
     return SDValue();
 
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
+
   SmallVector<SDValue, 8> OutChains;
   uint64_t DstOff = 0;
-
   unsigned NumMemOps = MemOps.size();
   for (unsigned i = 0; i < NumMemOps; i++) {
     EVT VT = MemOps[i];
@@ -3432,7 +3487,7 @@
     SDValue Value = getMemsetValue(Src, VT, DAG, dl);
     SDValue Store = DAG.getStore(Chain, dl, Value,
                                  getMemBasePlusOffset(Dst, DstOff, DAG),
-                                 DstSV, DstSVOff + DstOff, false, false, 0);
+                                 DstSV, DstSVOff + DstOff, isVol, false, 0);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3443,7 +3498,7 @@
 
 SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
-                                unsigned Align, bool AlwaysInline,
+                                unsigned Align, bool isVol, bool AlwaysInline,
                                 const Value *DstSV, uint64_t DstSVOff,
                                 const Value *SrcSV, uint64_t SrcSVOff) {
 
@@ -3455,10 +3510,9 @@
     if (ConstantSize->isNullValue())
       return Chain;
 
-    SDValue Result =
-      getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
-                              ConstantSize->getZExtValue(),
-                              Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+    SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+                                             ConstantSize->getZExtValue(),Align,
+                                isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
     if (Result.getNode())
       return Result;
   }
@@ -3467,7 +3521,7 @@
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
     TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
-                                AlwaysInline,
+                                isVol, AlwaysInline,
                                 DstSV, DstSVOff, SrcSV, SrcSVOff);
   if (Result.getNode())
     return Result;
@@ -3477,10 +3531,16 @@
   if (AlwaysInline) {
     assert(ConstantSize && "AlwaysInline requires a constant size!");
     return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
-                                   ConstantSize->getZExtValue(), Align, true,
-                                   DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                   ConstantSize->getZExtValue(), Align, isVol,
+                                   true, DstSV, DstSVOff, SrcSV, SrcSVOff);
   }
 
+  // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+  // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+  // respect volatile, so they may do things like read or write memory
+  // beyond the given memory regions. But fixing this isn't easy, and most
+  // people don't care.
+
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -3502,7 +3562,7 @@
 
 SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
-                                 unsigned Align,
+                                 unsigned Align, bool isVol,
                                  const Value *DstSV, uint64_t DstSVOff,
                                  const Value *SrcSV, uint64_t SrcSVOff) {
 
@@ -3516,8 +3576,8 @@
 
     SDValue Result =
       getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
-                               ConstantSize->getZExtValue(),
-                               Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                               ConstantSize->getZExtValue(), Align, isVol,
+                               false, DstSV, DstSVOff, SrcSV, SrcSVOff);
     if (Result.getNode())
       return Result;
   }
@@ -3525,11 +3585,14 @@
   // Then check to see if we should lower the memmove with target-specific
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
-    TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align,
+    TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
                                  DstSV, DstSVOff, SrcSV, SrcSVOff);
   if (Result.getNode())
     return Result;
 
+  // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+  // not be safe.  See memcpy above for more details.
+
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -3551,7 +3614,7 @@
 
 SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
-                                unsigned Align,
+                                unsigned Align, bool isVol,
                                 const Value *DstSV, uint64_t DstSVOff) {
 
   // Check to see if we should lower the memset to stores first.
@@ -3564,7 +3627,8 @@
 
     SDValue Result =
       getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
-                      Align, DstSV, DstSVOff);
+                      Align, isVol, DstSV, DstSVOff);
+
     if (Result.getNode())
       return Result;
   }
@@ -3572,12 +3636,12 @@
   // Then check to see if we should lower the memset with target-specific
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
-    TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align,
+    TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
                                 DstSV, DstSVOff);
   if (Result.getNode())
     return Result;
 
-  // Emit a library call.
+  // Emit a library call.  
   const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -3654,8 +3718,8 @@
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Cmp, Swp, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3717,8 +3781,8 @@
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Val, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3796,12 +3860,12 @@
       return SDValue(E, 0);
     }
 
-    N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
   }
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3874,8 +3938,8 @@
     cast<LoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
-  new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
+  SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType,
+                                             MemVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3956,8 +4020,8 @@
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              false, VT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4020,8 +4084,8 @@
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              true, SVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4043,10 +4107,10 @@
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, AM,
-                      ST->isTruncatingStore(), ST->getMemoryVT(),
-                      ST->getMemOperand());
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM,
+                                              ST->isTruncatingStore(),
+                                              ST->getMemoryVT(),
+                                              ST->getMemOperand());
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4117,12 +4181,10 @@
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<SDNode>();
-    new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<SDNode>();
-    new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
   }
 
   AllNodes.push_back(N);
@@ -4185,32 +4247,26 @@
       return SDValue(E, 0);
 
     if (NumOps == 1) {
-      N = NodeAllocator.Allocate<UnarySDNode>();
-      new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
     } else if (NumOps == 2) {
-      N = NodeAllocator.Allocate<BinarySDNode>();
-      new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
     } else if (NumOps == 3) {
-      N = NodeAllocator.Allocate<TernarySDNode>();
-      new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
     } else {
-      N = NodeAllocator.Allocate<SDNode>();
-      new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
     }
     CSEMap.InsertNode(N, IP);
   } else {
     if (NumOps == 1) {
-      N = NodeAllocator.Allocate<UnarySDNode>();
-      new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
     } else if (NumOps == 2) {
-      N = NodeAllocator.Allocate<BinarySDNode>();
-      new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
     } else if (NumOps == 3) {
-      N = NodeAllocator.Allocate<TernarySDNode>();
-      new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
     } else {
-      N = NodeAllocator.Allocate<SDNode>();
-      new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
     }
   }
   AllNodes.push_back(N);
@@ -4635,7 +4691,7 @@
         // remainder of the current SelectionDAG iteration, so we can allocate
         // the operands directly out of a pool with no recycling metadata.
         MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
-                        Ops, NumOps);
+                         Ops, NumOps);
       else
         MN->InitOperands(MN->LocalOperands, Ops, NumOps);
       MN->OperandsNeedDelete = false;
@@ -4809,8 +4865,7 @@
   }
 
   // Allocate a new MachineSDNode.
-  N = NodeAllocator.Allocate<MachineSDNode>();
-  new (N) MachineSDNode(~Opcode, DL, VTs);
+  N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs);
 
   // Initialize the operands list.
   if (NumOps > array_lengthof(N->LocalOperands))
@@ -4869,6 +4924,26 @@
   return NULL;
 }
 
+/// getDbgValue - Creates a SDDbgValue node.
+///
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+}
+
 namespace {
 
 /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
@@ -5264,6 +5339,13 @@
   return Ordering->getOrder(SD);
 }
 
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+  DbgInfo->add(DB, SD, isParameter);
+  if (SD)
+    SD->setHasDebugValue(true);
+}
 
 //===----------------------------------------------------------------------===//
 //                              SDNode Class
@@ -5275,9 +5357,8 @@
 
 GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
                                          EVT VT, int64_t o, unsigned char TF)
-  : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
-    Offset(o), TargetFlags(TF) {
-  TheGlobal = const_cast<GlobalValue*>(GA);
+  : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+  TheGlobal = GA;
 }
 
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
@@ -5497,6 +5578,7 @@
   case ISD::PCMARKER:      return "PCMarker";
   case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
   case ISD::SRCVALUE:      return "SrcValue";
+  case ISD::MDNODE_SDNODE: return "MDNode";
   case ISD::EntryToken:    return "EntryToken";
   case ISD::TokenFactor:   return "TokenFactor";
   case ISD::AssertSext:    return "AssertSext";
@@ -5639,6 +5721,8 @@
   case ISD::FP_TO_SINT:  return "fp_to_sint";
   case ISD::FP_TO_UINT:  return "fp_to_uint";
   case ISD::BIT_CONVERT: return "bit_convert";
+  case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+  case ISD::FP32_TO_FP16: return "fp32_to_fp16";
 
   case ISD::CONVERT_RNDSAT: {
     switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
@@ -5863,6 +5947,11 @@
       OS << "<" << M->getValue() << ">";
     else
       OS << "<null>";
+  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+    if (MD->getMD())
+      OS << "<" << MD->getMD() << ">";
+    else
+      OS << "<null>";
   } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
     OS << ":" << N->getVT().getEVTString();
   }
@@ -5911,7 +6000,7 @@
   if (G)
     if (unsigned Order = G->GetOrdering(this))
       OS << " [ORD=" << Order << ']';
-  
+
   if (getNodeId() != -1)
     OS << " [ID=" << getNodeId() << ']';
 }
@@ -6078,8 +6167,8 @@
       return true;
   }
 
-  GlobalValue *GV1 = NULL;
-  GlobalValue *GV2 = NULL;
+  const GlobalValue *GV1 = NULL;
+  const GlobalValue *GV2 = NULL;
   int64_t Offset1 = 0;
   int64_t Offset2 = 0;
   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
@@ -6094,10 +6183,22 @@
 /// it cannot be inferred.
 unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   // If this is a GlobalAddress + cst, return the alignment.
-  GlobalValue *GV;
+  const GlobalValue *GV;
   int64_t GVOffset = 0;
-  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
-    return MinAlign(GV->getAlignment(), GVOffset);
+  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+    // If GV has specified alignment, then use it. Otherwise, use the preferred
+    // alignment.
+    unsigned Align = GV->getAlignment();
+    if (!Align) {
+      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+        if (GVar->hasInitializer()) {
+          const TargetData *TD = TLI.getTargetData();
+          Align = TD->getPreferredAlignment(GVar);
+        }
+      }
+    }
+    return MinAlign(Align, GVOffset);
+  }
 
   // If this is a direct reference to a stack slot, use information about the
   // stack slot's alignment.
@@ -6251,8 +6352,8 @@
     if (OpVal.getOpcode() == ISD::UNDEF)
       SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
     else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
-      SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
-                     zextOrTrunc(sz) << BitPos);
+      SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+                    zextOrTrunc(sz) << BitPos;
     else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
       SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
      else
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 05be9a1..ade3836 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
+#include "SDNodeDbgValue.h"
 #include "SelectionDAGBuilder.h"
 #include "FunctionLoweringInfo.h"
 #include "llvm/ADT/BitVector.h"
@@ -27,7 +28,9 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -39,7 +42,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
@@ -168,7 +170,7 @@
     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
     /// operand list.  This adds the code marker, matching input operand index
     /// (if applicable), and includes the number of values added into it.
-    void AddInlineAsmOperands(unsigned Code,
+    void AddInlineAsmOperands(unsigned Kind,
                               bool HasMatching, unsigned MatchingIdx,
                               SelectionDAG &DAG,
                               std::vector<SDValue> &Ops) const;
@@ -533,7 +535,7 @@
   TD = DAG.getTarget().getTargetData();
 }
 
-/// clear - Clear out the curret SelectionDAG and the associated
+/// clear - Clear out the current SelectionDAG and the associated
 /// state and prepare this SelectionDAGBuilder object to be used
 /// for a new block. This doesn't clear out information about
 /// additional blocks that are needed to complete switch lowering
@@ -545,7 +547,7 @@
   PendingExports.clear();
   EdgeMapping.clear();
   DAG.clear();
-  CurDebugLoc = DebugLoc::getUnknownLoc();
+  CurDebugLoc = DebugLoc();
   HasTailCall = false;
 }
 
@@ -612,11 +614,26 @@
     AssignOrderingToNode(Node->getOperand(I).getNode());
 }
 
-void SelectionDAGBuilder::visit(Instruction &I) {
+void SelectionDAGBuilder::visit(const Instruction &I) {
+  // Set up outgoing PHI node register values before emitting the terminator.
+  if (isa<TerminatorInst>(&I))
+    HandlePHINodesInSuccessorBlocks(I.getParent());
+
+  CurDebugLoc = I.getDebugLoc();
+
   visit(I.getOpcode(), I);
+
+  if (!isa<TerminatorInst>(&I) && !HasTailCall)
+    CopyToExportRegsIfNeeded(&I);
+
+  CurDebugLoc = DebugLoc();
 }
 
-void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
   // Note: this doesn't use InstVisitor, because it has to work with
   // ConstantExpr's in addition to instructions.
   switch (Opcode) {
@@ -638,28 +655,28 @@
   SDValue &N = NodeMap[V];
   if (N.getNode()) return N;
 
-  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+  if (const Constant *C = dyn_cast<Constant>(V)) {
     EVT VT = TLI.getValueType(V->getType(), true);
 
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
       return N = DAG.getConstant(*CI, VT);
 
-    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
       return N = DAG.getGlobalAddress(GV, VT);
 
     if (isa<ConstantPointerNull>(C))
       return N = DAG.getConstant(0, TLI.getPointerTy());
 
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
       return N = DAG.getConstantFP(*CFP, VT);
 
     if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
       return N = DAG.getUNDEF(VT);
 
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       visit(CE->getOpcode(), *CE);
       SDValue N1 = NodeMap[V];
-      assert(N1.getNode() && "visit didn't populate the ValueMap!");
+      assert(N1.getNode() && "visit didn't populate the NodeMap!");
       return N1;
     }
 
@@ -704,7 +721,7 @@
                                 getCurDebugLoc());
     }
 
-    if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+    if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
       return DAG.getBlockAddress(BA, VT);
 
     const VectorType *VecTy = cast<VectorType>(V->getType());
@@ -713,7 +730,7 @@
     // Now that we know the number and type of the elements, get that number of
     // elements into the Ops array based on what kind of constant it is.
     SmallVector<SDValue, 16> Ops;
-    if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
       for (unsigned i = 0; i != NumElements; ++i)
         Ops.push_back(getValue(CP->getOperand(i)));
     } else {
@@ -756,7 +773,7 @@
 static void getReturnInfo(const Type* ReturnType,
                    Attributes attr, SmallVectorImpl<EVT> &OutVTs,
                    SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
-                   TargetLowering &TLI,
+                   const TargetLowering &TLI,
                    SmallVectorImpl<uint64_t> *Offsets = 0) {
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, ReturnType, ValueVTs);
@@ -811,7 +828,7 @@
   }
 }
 
-void SelectionDAGBuilder::visitRet(ReturnInst &I) {
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   SDValue Chain = getControlRoot();
   SmallVector<ISD::OutputArg, 8> Outs;
   FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
@@ -916,18 +933,18 @@
 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
 /// created for it, emit nodes to copy the value into the virtual
 /// registers.
-void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
-  if (!V->use_empty()) {
-    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
-    if (VMI != FuncInfo.ValueMap.end())
-      CopyValueToVirtualRegister(V, VMI->second);
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+  if (VMI != FuncInfo.ValueMap.end()) {
+    assert(!V->use_empty() && "Unused value assigned virtual registers!");
+    CopyValueToVirtualRegister(V, VMI->second);
   }
 }
 
 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
 /// the current basic block, add it to ValueMap now so that we'll get a
 /// CopyTo/FromReg.
-void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
   // No need to export constants.
   if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
 
@@ -938,11 +955,11 @@
   CopyValueToVirtualRegister(V, Reg);
 }
 
-bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
                                                      const BasicBlock *FromBB) {
   // The operands of the setcc have to be in this block.  We don't know
   // how to export them from some other block.
-  if (Instruction *VI = dyn_cast<Instruction>(V)) {
+  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
     // Can export from current BB.
     if (VI->getParent() == FromBB)
       return true;
@@ -971,85 +988,31 @@
   return true;
 }
 
-/// getFCmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR floating-point condition code.  This includes
-/// consideration of global floating-point math flags.
-///
-static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
-  ISD::CondCode FPC, FOC;
-  switch (Pred) {
-  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
-  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
-  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
-  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
-  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
-  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
-  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
-  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
-  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
-  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
-  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
-  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
-  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
-  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
-  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
-  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
-  default:
-    llvm_unreachable("Invalid FCmp predicate opcode!");
-    FOC = FPC = ISD::SETFALSE;
-    break;
-  }
-  if (FiniteOnlyFPMath())
-    return FOC;
-  else
-    return FPC;
-}
-
-/// getICmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR integer condition code.
-///
-static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
-  switch (Pred) {
-  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
-  case ICmpInst::ICMP_NE:  return ISD::SETNE;
-  case ICmpInst::ICMP_SLE: return ISD::SETLE;
-  case ICmpInst::ICMP_ULE: return ISD::SETULE;
-  case ICmpInst::ICMP_SGE: return ISD::SETGE;
-  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
-  case ICmpInst::ICMP_SLT: return ISD::SETLT;
-  case ICmpInst::ICMP_ULT: return ISD::SETULT;
-  case ICmpInst::ICMP_SGT: return ISD::SETGT;
-  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
-  default:
-    llvm_unreachable("Invalid ICmp predicate opcode!");
-    return ISD::SETNE;
-  }
-}
-
 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
 /// This function emits a branch and is used at the leaves of an OR or an
 /// AND operator tree.
 ///
 void
-SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
                                                   MachineBasicBlock *TBB,
                                                   MachineBasicBlock *FBB,
-                                                  MachineBasicBlock *CurBB) {
+                                                  MachineBasicBlock *CurBB,
+                                                  MachineBasicBlock *SwitchBB) {
   const BasicBlock *BB = CurBB->getBasicBlock();
 
   // If the leaf of the tree is a comparison, merge the condition into
   // the caseblock.
-  if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
     // The operands of the cmp have to be in this block.  We don't know
     // how to export them from some other block.  If this is the first block
     // of the sequence, no exporting is needed.
-    if (CurBB == CurMBB ||
+    if (CurBB == SwitchBB ||
         (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
          isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
       ISD::CondCode Condition;
-      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+      if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
         Condition = getICmpCondCode(IC->getPredicate());
-      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
         Condition = getFCmpCondCode(FC->getPredicate());
       } else {
         Condition = ISD::SETEQ; // silence warning.
@@ -1070,19 +1033,20 @@
 }
 
 /// FindMergedConditions - If Cond is an expression like
-void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                MachineBasicBlock *TBB,
                                                MachineBasicBlock *FBB,
                                                MachineBasicBlock *CurBB,
+                                               MachineBasicBlock *SwitchBB,
                                                unsigned Opc) {
   // If this node is not part of the or/and tree, emit it as a branch.
-  Instruction *BOp = dyn_cast<Instruction>(Cond);
+  const Instruction *BOp = dyn_cast<Instruction>(Cond);
   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
       (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
       BOp->getParent() != CurBB->getBasicBlock() ||
       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
-    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
     return;
   }
 
@@ -1102,10 +1066,10 @@
     //
 
     // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
 
     // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
   } else {
     assert(Opc == Instruction::And && "Unknown merge op!");
     // Codegen X & Y as:
@@ -1118,10 +1082,10 @@
     //  This requires creation of TmpBB after CurBB.
 
     // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
 
     // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
   }
 }
 
@@ -1156,19 +1120,21 @@
   return true;
 }
 
-void SelectionDAGBuilder::visitBr(BranchInst &I) {
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+  MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Update machine-CFG edges.
   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
 
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = BrMBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   if (I.isUnconditional()) {
     // Update machine-CFG edges.
-    CurMBB->addSuccessor(Succ0MBB);
+    BrMBB->addSuccessor(Succ0MBB);
 
     // If this is not a fall-through branch, emit the branch.
     if (Succ0MBB != NextBlock)
@@ -1181,7 +1147,7 @@
 
   // If this condition is one of the special cases we handle, do special stuff
   // now.
-  Value *CondVal = I.getCondition();
+  const Value *CondVal = I.getCondition();
   MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
 
   // If this is a series of conditions that are or'd or and'd together, emit
@@ -1199,15 +1165,16 @@
   //     cmp D, E
   //     jle foo
   //
-  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
     if (BOp->hasOneUse() &&
         (BOp->getOpcode() == Instruction::And ||
          BOp->getOpcode() == Instruction::Or)) {
-      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+                           BOp->getOpcode());
       // If the compares in later blocks need to use values not currently
       // exported from this block, export them now.  This block should always
       // be the first entry.
-      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
 
       // Allow some cases to be rejected.
       if (ShouldEmitAsBranches(SwitchCases)) {
@@ -1217,7 +1184,7 @@
         }
 
         // Emit the branch for this block.
-        visitSwitchCase(SwitchCases[0]);
+        visitSwitchCase(SwitchCases[0], BrMBB);
         SwitchCases.erase(SwitchCases.begin());
         return;
       }
@@ -1233,16 +1200,17 @@
 
   // Create a CaseBlock record representing this branch.
   CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
-               NULL, Succ0MBB, Succ1MBB, CurMBB);
+               NULL, Succ0MBB, Succ1MBB, BrMBB);
 
   // Use visitSwitchCase to actually insert the fast branch sequence for this
   // cond branch.
-  visitSwitchCase(CB);
+  visitSwitchCase(CB, BrMBB);
 }
 
 /// visitSwitchCase - Emits the necessary code to represent a single node in
 /// the binary search tree resulting from lowering a switch instruction.
-void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+                                          MachineBasicBlock *SwitchBB) {
   SDValue Cond;
   SDValue CondLHS = getValue(CB.CmpLHS);
   DebugLoc dl = getCurDebugLoc();
@@ -1281,13 +1249,13 @@
   }
 
   // Update successor info
-  CurMBB->addSuccessor(CB.TrueBB);
-  CurMBB->addSuccessor(CB.FalseBB);
+  SwitchBB->addSuccessor(CB.TrueBB);
+  SwitchBB->addSuccessor(CB.FalseBB);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
@@ -1305,11 +1273,11 @@
 
   // If the branch was constant folded, fix up the CFG.
   if (BrCond.getOpcode() == ISD::BR) {
-    CurMBB->removeSuccessor(CB.FalseBB);
+    SwitchBB->removeSuccessor(CB.FalseBB);
   } else {
     // Otherwise, go ahead and insert the false branch.
     if (BrCond == getControlRoot())
-      CurMBB->removeSuccessor(CB.TrueBB);
+      SwitchBB->removeSuccessor(CB.TrueBB);
 
     if (CB.FalseBB != NextBlock)
       BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
@@ -1336,7 +1304,8 @@
 /// visitJumpTableHeader - This function emits necessary code to produce index
 /// in the JumpTable from switch case.
 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
-                                               JumpTableHeader &JTH) {
+                                               JumpTableHeader &JTH,
+                                               MachineBasicBlock *SwitchBB) {
   // Subtract the lowest switch case value from the value being switched on and
   // conditional branch to default mbb if the result is greater than the
   // difference between smallest and largest cases.
@@ -1368,7 +1337,7 @@
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
 
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
@@ -1386,7 +1355,8 @@
 
 /// visitBitTestHeader - This function emits necessary code to produce value
 /// suitable for "bit tests"
-void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+                                             MachineBasicBlock *SwitchBB) {
   // Subtract the minimum value
   SDValue SwitchOp = getValue(B.SValue);
   EVT VT = SwitchOp.getValueType();
@@ -1409,14 +1379,14 @@
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
-  CurMBB->addSuccessor(B.Default);
-  CurMBB->addSuccessor(MBB);
+  SwitchBB->addSuccessor(B.Default);
+  SwitchBB->addSuccessor(MBB);
 
   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                 MVT::Other, CopyTo, RangeCmp,
@@ -1432,7 +1402,8 @@
 /// visitBitTestCase - this function produces one "bit test"
 void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
                                            unsigned Reg,
-                                           BitTestCase &B) {
+                                           BitTestCase &B,
+                                           MachineBasicBlock *SwitchBB) {
   // Make desired shift
   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
                                        TLI.getPointerTy());
@@ -1450,8 +1421,8 @@
                                 AndOp, DAG.getConstant(0, TLI.getPointerTy()),
                                 ISD::SETNE);
 
-  CurMBB->addSuccessor(B.TargetBB);
-  CurMBB->addSuccessor(NextMBB);
+  SwitchBB->addSuccessor(B.TargetBB);
+  SwitchBB->addSuccessor(NextMBB);
 
   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -1460,7 +1431,7 @@
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
@@ -1471,7 +1442,9 @@
   DAG.setRoot(BrAnd);
 }
 
-void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+  MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Retrieve successors.
   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
   MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
@@ -1487,8 +1460,8 @@
   CopyToExportRegsIfNeeded(&I);
 
   // Update successor info
-  CurMBB->addSuccessor(Return);
-  CurMBB->addSuccessor(LandingPad);
+  InvokeMBB->addSuccessor(Return);
+  InvokeMBB->addSuccessor(LandingPad);
 
   // Drop into normal successor.
   DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1496,15 +1469,16 @@
                           DAG.getBasicBlock(Return)));
 }
 
-void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
+void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
 }
 
 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
 /// small case ranges).
 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
                                                  CaseRecVector& WorkList,
-                                                 Value* SV,
-                                                 MachineBasicBlock* Default) {
+                                                 const Value* SV,
+                                                 MachineBasicBlock *Default,
+                                                 MachineBasicBlock *SwitchBB) {
   Case& BackCase  = *(CR.Range.second-1);
 
   // Size is the number of Cases represented by this range.
@@ -1557,7 +1531,7 @@
       FallThrough = Default;
     }
 
-    Value *RHS, *LHS, *MHS;
+    const Value *RHS, *LHS, *MHS;
     ISD::CondCode CC;
     if (I->High == I->Low) {
       // This is just small small case range :) containing exactly 1 case
@@ -1573,8 +1547,8 @@
     // code into the current block.  Otherwise, push the CaseBlock onto the
     // vector to be later processed by SDISel, and insert the node's MBB
     // before the next MBB.
-    if (CurBlock == CurMBB)
-      visitSwitchCase(CB);
+    if (CurBlock == SwitchBB)
+      visitSwitchCase(CB, SwitchBB);
     else
       SwitchCases.push_back(CB);
 
@@ -1600,8 +1574,9 @@
 /// handleJTSwitchCase - Emit jumptable for current switch case range
 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
                                              CaseRecVector& WorkList,
-                                             Value* SV,
-                                             MachineBasicBlock* Default) {
+                                             const Value* SV,
+                                             MachineBasicBlock* Default,
+                                             MachineBasicBlock *SwitchBB) {
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
 
@@ -1613,7 +1588,7 @@
        I!=E; ++I)
     TSize += I->size();
 
-  if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
+  if (!areJTsAllowed(TLI) || TSize.ult(4))
     return false;
 
   APInt Range = ComputeRange(First, Last);
@@ -1674,18 +1649,17 @@
     }
   }
 
-  // Create a jump table index for this jump table, or return an existing
-  // one.
+  // Create a jump table index for this jump table.
   unsigned JTEncoding = TLI.getJumpTableEncoding();
   unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
-                       ->getJumpTableIndex(DestBBs);
+                       ->createJumpTableIndex(DestBBs);
 
   // Set the jump table information so that we can codegen it as a second
   // MachineBasicBlock
   JumpTable JT(-1U, JTI, JumpTableBB, Default);
-  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
-  if (CR.CaseBB == CurMBB)
-    visitJumpTableHeader(JT, JTH);
+  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
+  if (CR.CaseBB == SwitchBB)
+    visitJumpTableHeader(JT, JTH, SwitchBB);
 
   JTCases.push_back(JumpTableBlock(JTH, JT));
 
@@ -1696,8 +1670,9 @@
 /// 2 subtrees.
 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
                                                   CaseRecVector& WorkList,
-                                                  Value* SV,
-                                                  MachineBasicBlock* Default) {
+                                                  const Value* SV,
+                                                  MachineBasicBlock *Default,
+                                                  MachineBasicBlock *SwitchBB) {
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
   MachineFunction *CurMF = FuncInfo.MF;
@@ -1811,8 +1786,8 @@
   // Otherwise, branch to LHS.
   CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
 
-  if (CR.CaseBB == CurMBB)
-    visitSwitchCase(CB);
+  if (CR.CaseBB == SwitchBB)
+    visitSwitchCase(CB, SwitchBB);
   else
     SwitchCases.push_back(CB);
 
@@ -1824,8 +1799,9 @@
 /// of masks and emit bit tests with these masks.
 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
                                                    CaseRecVector& WorkList,
-                                                   Value* SV,
-                                                   MachineBasicBlock* Default){
+                                                   const Value* SV,
+                                                   MachineBasicBlock* Default,
+                                                   MachineBasicBlock *SwitchBB){
   EVT PTy = TLI.getPointerTy();
   unsigned IntPtrBits = PTy.getSizeInBits();
 
@@ -1868,7 +1844,7 @@
                << "Low bound: " << minValue << '\n'
                << "High bound: " << maxValue << '\n');
 
-  if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
+  if (cmpRange.uge(IntPtrBits) ||
       (!(Dests.size() == 1 && numCmps >= 3) &&
        !(Dests.size() == 2 && numCmps >= 5) &&
        !(Dests.size() >= 3 && numCmps >= 6)))
@@ -1880,8 +1856,7 @@
   // Optimize the case where all the case values fit in a
   // word without having to subtract minValue. In this case,
   // we can optimize away the subtraction.
-  if (minValue.isNonNegative() &&
-      maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
+  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
     cmpRange = maxValue;
   } else {
     lowBound = minValue;
@@ -1941,11 +1916,11 @@
   }
 
   BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, (CR.CaseBB == CurMBB),
+                   -1U, (CR.CaseBB == SwitchBB),
                    CR.CaseBB, Default, BTC);
 
-  if (CR.CaseBB == CurMBB)
-    visitBitTestHeader(BTB);
+  if (CR.CaseBB == SwitchBB)
+    visitBitTestHeader(BTB, SwitchBB);
 
   BitTestCases.push_back(BTB);
 
@@ -1995,7 +1970,9 @@
   return numCmps;
 }
 
-void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()];
+
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
   MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -2006,7 +1983,7 @@
     // Update machine-CFG edges.
 
     // If this is not a fall-through branch, emit the branch.
-    CurMBB->addSuccessor(Default);
+    SwitchMBB->addSuccessor(Default);
     if (Default != NextBlock)
       DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -2027,38 +2004,41 @@
   // Get the Value to be switched on and default basic blocks, which will be
   // inserted into CaseBlock records, representing basic blocks in the binary
   // search tree.
-  Value *SV = SI.getOperand(0);
+  const Value *SV = SI.getOperand(0);
 
   // Push the initial CaseRec onto the worklist
   CaseRecVector WorkList;
-  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+  WorkList.push_back(CaseRec(SwitchMBB,0,0,
+                             CaseRange(Cases.begin(),Cases.end())));
 
   while (!WorkList.empty()) {
     // Grab a record representing a case range to process off the worklist
     CaseRec CR = WorkList.back();
     WorkList.pop_back();
 
-    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // If the range has few cases (two or less) emit a series of specific
     // tests.
-    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // If the switch has more than 5 blocks, and at least 40% dense, and the
     // target supports indirect branches, then emit a jump table rather than
     // lowering the switch to a binary tree of conditional branches.
-    if (handleJTSwitchCase(CR, WorkList, SV, Default))
+    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // Emit binary tree. We need to pick a pivot, and push left and right ranges
     // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
-    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
   }
 }
 
-void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Update machine-CFG edges with unique successors.
   SmallVector<BasicBlock*, 32> succs;
   succs.reserve(I.getNumSuccessors());
@@ -2067,14 +2047,14 @@
   array_pod_sort(succs.begin(), succs.end());
   succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
   for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                           MVT::Other, getControlRoot(),
                           getValue(I.getAddress())));
 }
 
-void SelectionDAGBuilder::visitFSub(User &I) {
+void SelectionDAGBuilder::visitFSub(const User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
   if (Ty->isVectorTy()) {
@@ -2104,14 +2084,14 @@
   visitBinary(I, ISD::FSUB);
 }
 
-void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
                            Op1.getValueType(), Op1, Op2));
 }
 
-void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   if (!I.getType()->isVectorTy() &&
@@ -2145,11 +2125,11 @@
                            Op1.getValueType(), Op1, Op2));
 }
 
-void SelectionDAGBuilder::visitICmp(User &I) {
+void SelectionDAGBuilder::visitICmp(const User &I) {
   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
-  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
     predicate = IC->getPredicate();
-  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
     predicate = ICmpInst::Predicate(IC->getPredicate());
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
@@ -2159,11 +2139,11 @@
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
 }
 
-void SelectionDAGBuilder::visitFCmp(User &I) {
+void SelectionDAGBuilder::visitFCmp(const User &I) {
   FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
-  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
     predicate = FC->getPredicate();
-  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
     predicate = FCmpInst::Predicate(FC->getPredicate());
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
@@ -2172,7 +2152,7 @@
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
 
-void SelectionDAGBuilder::visitSelect(User &I) {
+void SelectionDAGBuilder::visitSelect(const User &I) {
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
   unsigned NumValues = ValueVTs.size();
@@ -2185,7 +2165,8 @@
 
   for (unsigned i = 0; i != NumValues; ++i)
     Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
-                            TrueVal.getNode()->getValueType(i), Cond,
+                          TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+                            Cond,
                             SDValue(TrueVal.getNode(),
                                     TrueVal.getResNo() + i),
                             SDValue(FalseVal.getNode(),
@@ -2196,14 +2177,14 @@
                            &Values[0], NumValues));
 }
 
-void SelectionDAGBuilder::visitTrunc(User &I) {
+void SelectionDAGBuilder::visitTrunc(const User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitZExt(User &I) {
+void SelectionDAGBuilder::visitZExt(const User &I) {
   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
@@ -2211,7 +2192,7 @@
   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitSExt(User &I) {
+void SelectionDAGBuilder::visitSExt(const User &I) {
   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
@@ -2219,7 +2200,7 @@
   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPTrunc(User &I) {
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
@@ -2227,42 +2208,42 @@
                            DestVT, N, DAG.getIntPtrConstant(0)));
 }
 
-void SelectionDAGBuilder::visitFPExt(User &I){
+void SelectionDAGBuilder::visitFPExt(const User &I){
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPToUI(User &I) {
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPToSI(User &I) {
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitUIToFP(User &I) {
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitSIToFP(User &I){
+void SelectionDAGBuilder::visitSIToFP(const User &I){
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitPtrToInt(User &I) {
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
@@ -2271,7 +2252,7 @@
   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
-void SelectionDAGBuilder::visitIntToPtr(User &I) {
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
@@ -2280,7 +2261,7 @@
   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
-void SelectionDAGBuilder::visitBitCast(User &I) {
+void SelectionDAGBuilder::visitBitCast(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
 
@@ -2293,7 +2274,7 @@
     setValue(&I, N);            // noop cast.
 }
 
-void SelectionDAGBuilder::visitInsertElement(User &I) {
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InVal = getValue(I.getOperand(1));
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
@@ -2304,7 +2285,7 @@
                            InVec, InVal, InIdx));
 }
 
-void SelectionDAGBuilder::visitExtractElement(User &I) {
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
                               TLI.getPointerTy(),
@@ -2323,7 +2304,7 @@
   return true;
 }
 
-void SelectionDAGBuilder::visitShuffleVector(User &I) {
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   SmallVector<int, 8> Mask;
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
@@ -2504,7 +2485,7 @@
                            VT, &Ops[0], Ops.size()));
 }
 
-void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   const Value *Op0 = I.getOperand(0);
   const Value *Op1 = I.getOperand(1);
   const Type *AggTy = I.getType();
@@ -2545,7 +2526,7 @@
                            &Values[0], NumAggValues));
 }
 
-void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   const Value *Op0 = I.getOperand(0);
   const Type *AggTy = Op0->getType();
   const Type *ValTy = I.getType();
@@ -2573,13 +2554,13 @@
                            &Values[0], NumValValues));
 }
 
-void SelectionDAGBuilder::visitGetElementPtr(User &I) {
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   const Type *Ty = I.getOperand(0)->getType();
 
-  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+  for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
-    Value *Idx = *OI;
+    const Value *Idx = *OI;
     if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
@@ -2590,11 +2571,16 @@
       }
 
       Ty = StTy->getElementType(Field);
+    } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      
+      // Offset canonically 0 for unions, but type changes
+      Ty = UnTy->getElementType(Field);
     } else {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // If this is a constant subscript, handle it quickly.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->getZExtValue() == 0) continue;
         uint64_t Offs =
             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
@@ -2645,7 +2631,7 @@
   setValue(&I, N);
 }
 
-void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   // If this is a fixed sized alloca in the entry block of the function,
   // allocate it statically on the stack.
   if (FuncInfo.StaticAllocaMap.count(&I))
@@ -2669,8 +2655,7 @@
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  unsigned StackAlign =
-    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
   if (Align <= StackAlign)
     Align = 0;
 
@@ -2697,7 +2682,7 @@
   FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
 }
 
-void SelectionDAGBuilder::visitLoad(LoadInst &I) {
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   const Value *SV = I.getOperand(0);
   SDValue Ptr = getValue(SV);
 
@@ -2757,9 +2742,9 @@
                            &Values[0], NumValues));
 }
 
-void SelectionDAGBuilder::visitStore(StoreInst &I) {
-  Value *SrcV = I.getOperand(0);
-  Value *PtrV = I.getOperand(1);
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+  const Value *SrcV = I.getOperand(0);
+  const Value *PtrV = I.getOperand(1);
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
@@ -2796,7 +2781,7 @@
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
 /// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
                                                unsigned Intrinsic) {
   bool HasChain = !I.doesNotAccessMemory();
   bool OnlyLoad = HasChain && I.onlyReadsMemory();
@@ -2922,7 +2907,8 @@
 /// visitIntrinsicCall: I is a call instruction
 ///                     Op is the associated NodeType for I
 const char *
-SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
+                                           ISD::NodeType Op) {
   SDValue Root = getRoot();
   SDValue L =
     DAG.getAtomic(Op, getCurDebugLoc(),
@@ -2938,7 +2924,7 @@
 
 // implVisitAluOverflow - Lower arithmetic overflow instrinsics.
 const char *
-SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
   SDValue Op1 = getValue(I.getOperand(1));
   SDValue Op2 = getValue(I.getOperand(2));
 
@@ -2950,7 +2936,7 @@
 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitExp(CallInst &I) {
+SelectionDAGBuilder::visitExp(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3076,7 +3062,7 @@
 /// visitLog - Lower a log intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog(CallInst &I) {
+SelectionDAGBuilder::visitLog(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3186,7 +3172,7 @@
 /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog2(CallInst &I) {
+SelectionDAGBuilder::visitLog2(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3295,7 +3281,7 @@
 /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog10(CallInst &I) {
+SelectionDAGBuilder::visitLog10(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3397,7 +3383,7 @@
 /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitExp2(CallInst &I) {
+SelectionDAGBuilder::visitExp2(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3511,9 +3497,9 @@
 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
 /// limited-precision mode with x == 10.0f.
 void
-SelectionDAGBuilder::visitPow(CallInst &I) {
+SelectionDAGBuilder::visitPow(const CallInst &I) {
   SDValue result;
-  Value *Val = I.getOperand(1);
+  const Value *Val = I.getOperand(1);
   DebugLoc dl = getCurDebugLoc();
   bool IsExp10 = false;
 
@@ -3659,7 +3645,7 @@
     if (Val == 0)
       return DAG.getConstantFP(1.0, LHS.getValueType());
 
-    Function *F = DAG.getMachineFunction().getFunction();
+    const Function *F = DAG.getMachineFunction().getFunction();
     if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
         // If optimizing for size, don't insert too many multiplies.  This
         // inserts up to 5 multiplies.
@@ -3700,7 +3686,7 @@
 /// we want to emit this as a call to a named external function, return the name
 /// otherwise lower it and return null.
 const char *
-SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   DebugLoc dl = getCurDebugLoc();
   SDValue Res;
 
@@ -3725,28 +3711,50 @@
   case Intrinsic::longjmp:
     return "_longjmp"+!TLI.usesUnderscoreLongJmp();
   case Intrinsic::memcpy: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
     SDValue Op1 = getValue(I.getOperand(1));
     SDValue Op2 = getValue(I.getOperand(2));
     SDValue Op3 = getValue(I.getOperand(3));
     unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+    bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
                               I.getOperand(1), 0, I.getOperand(2), 0));
     return 0;
   }
   case Intrinsic::memset: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
     SDValue Op1 = getValue(I.getOperand(1));
     SDValue Op2 = getValue(I.getOperand(2));
     SDValue Op3 = getValue(I.getOperand(3));
     unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
+    bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
                               I.getOperand(1), 0));
     return 0;
   }
   case Intrinsic::memmove: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
     SDValue Op1 = getValue(I.getOperand(1));
     SDValue Op2 = getValue(I.getOperand(2));
     SDValue Op3 = getValue(I.getOperand(3));
     unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+    bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
 
     // If the source and destination are known to not be aliases, we can
     // lower memmove as memcpy.
@@ -3755,69 +3763,128 @@
       Size = C->getZExtValue();
     if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
         AliasAnalysis::NoAlias) {
-      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
-                                I.getOperand(1), 0, I.getOperand(2), 0));
+      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 
+                                false, I.getOperand(1), 0, I.getOperand(2), 0));
       return 0;
     }
 
-    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
+    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
                                I.getOperand(1), 0, I.getOperand(2), 0));
     return 0;
   }
   case Intrinsic::dbg_declare: {
-    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
-    // The real handling of this intrinsic is in FastISel.
-    if (OptLevel != CodeGenOpt::None)
-      // FIXME: Variable debug info is not supported here.
-      return 0;
-    DwarfWriter *DW = DAG.getDwarfWriter();
-    if (!DW)
-      return 0;
-    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
       return 0;
 
     MDNode *Variable = DI.getVariable();
-    Value *Address = DI.getAddress();
+    // Parameters are handled specially.
+    bool isParameter = false;
+    ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Variable->getOperand(0));
+    if (CI) {
+      unsigned Val = CI->getZExtValue();
+      unsigned Tag = Val & ~LLVMDebugVersionMask;
+      if (Tag == dwarf::DW_TAG_arg_variable)
+        isParameter = true;
+    }
+    const Value *Address = DI.getAddress();
     if (!Address)
       return 0;
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
       Address = BCI->getOperand(0);
-    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-    // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
-      return 0;
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      FuncInfo.StaticAllocaMap.find(AI);
-    if (SI == FuncInfo.StaticAllocaMap.end())
-      return 0; // VLAs.
-    int FI = SI->second;
+    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    if (AI) {
+      // Don't handle byval arguments or VLAs, for example.
+      // Non-byval arguments are handled here (they refer to the stack temporary
+      // alloca at this point).
+      DenseMap<const AllocaInst*, int>::iterator SI =
+        FuncInfo.StaticAllocaMap.find(AI);
+      if (SI == FuncInfo.StaticAllocaMap.end())
+        return 0; // VLAs.
+      int FI = SI->second;
 
-    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
-      if (MDNode *Dbg = DI.getMetadata("dbg"))
-        MMI->setVariableDbgInfo(Variable, FI, Dbg);
+      MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+      if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+        MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+    }
+
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+    SDValue &N = NodeMap[Address];
+    SDDbgValue *SDV;
+    if (N.getNode()) {
+      if (isParameter && !AI) {
+        FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+        if (FINode)
+          // Byval parameter.  We have a frame index at this point.
+          SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+                                0, dl, SDNodeOrder);
+        else
+          // Can't do anything with other non-AI cases yet.  This might be a
+          // parameter of a callee function that got inlined, for example.
+          return 0;
+      } else if (AI)
+        SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+                              0, dl, SDNodeOrder);
+      else
+        // Can't do anything with other non-AI cases yet.
+        return 0;
+      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+    } else {
+      // Generating Undefs here seems to be actively harmful because it
+      // affects the line numbers.
+      return 0;
+#if 0
+      // This isn't useful, but it shows what we're missing.
+      SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
+                            0, dl, SDNodeOrder);
+      DAG.AddDbgValue(SDV, 0, isParameter);
+#endif
+    }
     return 0;
   }
   case Intrinsic::dbg_value: {
-    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
-    // The real handling of this intrinsic is in FastISel.
-    if (OptLevel != CodeGenOpt::None)
-      // FIXME: Variable debug info is not supported here.
-      return 0;
-    DwarfWriter *DW = DAG.getDwarfWriter();
-    if (!DW)
-      return 0;
-    DbgValueInst &DI = cast<DbgValueInst>(I);
+    const DbgValueInst &DI = cast<DbgValueInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
       return 0;
 
     MDNode *Variable = DI.getVariable();
-    Value *V = DI.getValue();
+    uint64_t Offset = DI.getOffset();
+    const Value *V = DI.getValue();
     if (!V)
       return 0;
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+    SDDbgValue *SDV;
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+      SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+      DAG.AddDbgValue(SDV, 0, false);
+    } else {
+      SDValue &N = NodeMap[V];
+      if (N.getNode()) {
+        SDV = DAG.getDbgValue(Variable, N.getNode(),
+                              N.getResNo(), Offset, dl, SDNodeOrder);
+        DAG.AddDbgValue(SDV, N.getNode(), false);
+      } else {
+        // We may expand this to cover more cases.  One case where we have no
+        // data available is an unreferenced parameter; we need this fallback.
+        SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
+                              Offset, dl, SDNodeOrder);
+        DAG.AddDbgValue(SDV, 0, false);
+      }
+    }
+
+    // Build a debug info table entry.
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
       V = BCI->getOperand(0);
-    AllocaInst *AI = dyn_cast<AllocaInst>(V);
+    const AllocaInst *AI = dyn_cast<AllocaInst>(V);
     // Don't handle byval struct arguments or VLAs, for example.
     if (!AI)
       return 0;
@@ -3826,14 +3893,16 @@
     if (SI == FuncInfo.StaticAllocaMap.end())
       return 0; // VLAs.
     int FI = SI->second;
-    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
-      if (MDNode *Dbg = DI.getMetadata("dbg"))
-        MMI->setVariableDbgInfo(Variable, FI, Dbg);
+    
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+    if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+      MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
     return 0;
   }
   case Intrinsic::eh_exception: {
     // Insert the EXCEPTIONADDR instruction.
-    assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
+    assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() &&
+           "Call to eh.exception not in landing pad!");
     SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
     SDValue Ops[1];
     Ops[0] = DAG.getRoot();
@@ -3844,17 +3913,17 @@
   }
 
   case Intrinsic::eh_selector: {
-    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-
-    if (CurMBB->isLandingPad())
-      AddCatchInfo(I, MMI, CurMBB);
+    MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()];
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+    if (CallMBB->isLandingPad())
+      AddCatchInfo(I, &MMI, CallMBB);
     else {
 #ifndef NDEBUG
       FuncInfo.CatchInfoLost.insert(&I);
 #endif
       // FIXME: Mark exception selector register as live in.  Hack for PR1508.
       unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) CurMBB->addLiveIn(Reg);
+      if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg);
     }
 
     // Insert the EHSELECTION instruction.
@@ -3869,40 +3938,25 @@
   }
 
   case Intrinsic::eh_typeid_for: {
-    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-
-    if (MMI) {
-      // Find the type id for the given typeinfo.
-      GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
-      unsigned TypeID = MMI->getTypeIDFor(GV);
-      Res = DAG.getConstant(TypeID, MVT::i32);
-    } else {
-      // Return something different to eh_selector.
-      Res = DAG.getConstant(1, MVT::i32);
-    }
-
+    // Find the type id for the given typeinfo.
+    GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+    unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+    Res = DAG.getConstant(TypeID, MVT::i32);
     setValue(&I, Res);
     return 0;
   }
 
   case Intrinsic::eh_return_i32:
   case Intrinsic::eh_return_i64:
-    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
-      MMI->setCallsEHReturn(true);
-      DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
-                              MVT::Other,
-                              getControlRoot(),
-                              getValue(I.getOperand(1)),
-                              getValue(I.getOperand(2))));
-    } else {
-      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
-    }
-
+    DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+    DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+                            MVT::Other,
+                            getControlRoot(),
+                            getValue(I.getOperand(1)),
+                            getValue(I.getOperand(2))));
     return 0;
   case Intrinsic::eh_unwind_init:
-    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
-      MMI->setCallsUnwindInit(true);
-    }
+    DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
     return 0;
   case Intrinsic::eh_dwarf_cfa: {
     EVT VT = getValue(I.getOperand(1)).getValueType();
@@ -3921,12 +3975,12 @@
     return 0;
   }
   case Intrinsic::eh_sjlj_callsite: {
-    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
     ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
     assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
-    assert(MMI->getCurrentCallSite() == 0 && "Overlapping call sites!");
+    assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
 
-    MMI->setCurrentCallSite(CI->getZExtValue());
+    MMI.setCurrentCallSite(CI->getZExtValue());
     return 0;
   }
 
@@ -3952,7 +4006,7 @@
     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
     }
     EVT DestVT = TLI.getValueType(I.getType());
-    Value *Op1 = I.getOperand(1);
+    const Value *Op1 = I.getOperand(1);
     Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
                                DAG.getValueType(DestVT),
                                DAG.getValueType(getValue(Op1).getValueType()),
@@ -3999,6 +4053,14 @@
   case Intrinsic::pow:
     visitPow(I);
     return 0;
+  case Intrinsic::convert_to_fp16:
+    setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
+                             MVT::i16, getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::convert_from_fp16:
+    setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
+                             MVT::f32, getValue(I.getOperand(1))));
+    return 0;
   case Intrinsic::pcmarker: {
     SDValue Tmp = getValue(I.getOperand(1));
     DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
@@ -4113,8 +4175,8 @@
   }
   case Intrinsic::gcroot:
     if (GFI) {
-      Value *Alloca = I.getOperand(1);
-      Constant *TypeMap = cast<Constant>(I.getOperand(2));
+      const Value *Alloca = I.getOperand(1);
+      const Constant *TypeMap = cast<Constant>(I.getOperand(2));
 
       FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
       GFI->addStackRoot(FI->getIndex(), TypeMap);
@@ -4211,97 +4273,14 @@
   }
 }
 
-/// Test if the given instruction is in a position to be optimized
-/// with a tail-call. This roughly means that it's in a block with
-/// a return and there's nothing that needs to be scheduled
-/// between it and the return.
-///
-/// This function only tests target-independent requirements.
-static bool
-isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
-                     const TargetLowering &TLI) {
-  const Instruction *I = CS.getInstruction();
-  const BasicBlock *ExitBB = I->getParent();
-  const TerminatorInst *Term = ExitBB->getTerminator();
-  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
-  const Function *F = ExitBB->getParent();
-
-  // The block must end in a return statement or unreachable.
-  //
-  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
-  // an unreachable, for now. The way tailcall optimization is currently
-  // implemented means it will add an epilogue followed by a jump. That is
-  // not profitable. Also, if the callee is a special function (e.g.
-  // longjmp on x86), it can end up causing miscompilation that has not
-  // been fully understood.
-  if (!Ret &&
-      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
-
-  // If I will have a chain, make sure no other instruction that will have a
-  // chain interposes between I and the return.
-  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
-      !I->isSafeToSpeculativelyExecute())
-    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
-         --BBI) {
-      if (&*BBI == I)
-        break;
-      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
-          !BBI->isSafeToSpeculativelyExecute())
-        return false;
-    }
-
-  // If the block ends with a void return or unreachable, it doesn't matter
-  // what the call's return type is.
-  if (!Ret || Ret->getNumOperands() == 0) return true;
-
-  // If the return value is undef, it doesn't matter what the call's
-  // return type is.
-  if (isa<UndefValue>(Ret->getOperand(0))) return true;
-
-  // Conservatively require the attributes of the call to match those of
-  // the return. Ignore noalias because it doesn't affect the call sequence.
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
-  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
-    return false;
-
-  // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
-    return false;
-
-  // Otherwise, make sure the unmodified return value of I is the return value.
-  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
-       U = dyn_cast<Instruction>(U->getOperand(0))) {
-    if (!U)
-      return false;
-    if (!U->hasOneUse())
-      return false;
-    if (U == I)
-      break;
-    // Check for a truly no-op truncate.
-    if (isa<TruncInst>(U) &&
-        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
-      continue;
-    // Check for a truly no-op bitcast.
-    if (isa<BitCastInst>(U) &&
-        (U->getOperand(0)->getType() == U->getType() ||
-         (U->getOperand(0)->getType()->isPointerTy() &&
-          U->getType()->isPointerTy())))
-      continue;
-    // Otherwise it's not a true no-op.
-    return false;
-  }
-
-  return true;
-}
-
-void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                       bool isTailCall,
                                       MachineBasicBlock *LandingPad) {
   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   const Type *RetTy = FTy->getReturnType();
-  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-  unsigned BeginLabel = 0, EndLabel = 0;
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  MCSymbol *BeginLabel = 0;
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -4342,7 +4321,7 @@
     RetTy = Type::getVoidTy(FTy->getContext());
   }
 
-  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     SDValue ArgNode = getValue(*i);
     Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
@@ -4358,25 +4337,24 @@
     Args.push_back(Entry);
   }
 
-  if (LandingPad && MMI) {
+  if (LandingPad) {
     // Insert a label before the invoke call to mark the try range.  This can be
     // used to detect deletion of the invoke via the MachineModuleInfo.
-    BeginLabel = MMI->NextLabelID();
+    BeginLabel = MMI.getContext().CreateTempSymbol();
 
     // For SjLj, keep track of which landing pads go with which invokes
     // so as to maintain the ordering of pads in the LSDA.
-    unsigned CallSiteIndex = MMI->getCurrentCallSite();
+    unsigned CallSiteIndex = MMI.getCurrentCallSite();
     if (CallSiteIndex) {
-      MMI->setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
       // Now that the call site is handled, stop tracking it.
-      MMI->setCurrentCallSite(0);
+      MMI.setCurrentCallSite(0);
     }
 
     // Both PendingLoads and PendingExports must be flushed here;
     // this call might not return.
     (void)getRoot();
-    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
-                             getControlRoot(), BeginLabel));
+    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
   }
 
   // Check if target-independent constraints permit a tail call here.
@@ -4461,26 +4439,25 @@
   else
     HasTailCall = true;
 
-  if (LandingPad && MMI) {
+  if (LandingPad) {
     // Insert a label at the end of the invoke call to mark the try range.  This
     // can be used to detect deletion of the invoke via the MachineModuleInfo.
-    EndLabel = MMI->NextLabelID();
-    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
-                             getRoot(), EndLabel));
+    MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
 
     // Inform MachineModuleInfo of range.
-    MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+    MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
   }
 }
 
 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
 /// value is equal or not-equal to zero.
-static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
        UI != E; ++UI) {
-    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+    if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
       if (IC->isEquality())
-        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+        if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
           if (C->isNullValue())
             continue;
     // Unknown instruction.
@@ -4489,17 +4466,20 @@
   return true;
 }
 
-static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+                             const Type *LoadTy,
                              SelectionDAGBuilder &Builder) {
 
   // Check to see if this load can be trivially constant folded, e.g. if the
   // input is from a string literal.
-  if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+  if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
     // Cast pointer to the type we really want to load.
-    LoadInput = ConstantExpr::getBitCast(LoadInput,
+    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
                                          PointerType::getUnqual(LoadTy));
 
-    if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD))
+    if (const Constant *LoadCst =
+          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+                                       Builder.TD))
       return Builder.getValue(LoadCst);
   }
 
@@ -4532,18 +4512,18 @@
 /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
 /// If so, return true and lower it, otherwise return false and it will be
 /// lowered like a normal call.
-bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
   // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
   if (I.getNumOperands() != 4)
     return false;
 
-  Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
+  const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
   if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
       !I.getOperand(3)->getType()->isIntegerTy() ||
       !I.getType()->isIntegerTy())
     return false;
 
-  ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
+  const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
 
   // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
   // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
@@ -4609,11 +4589,11 @@
 }
 
 
-void SelectionDAGBuilder::visitCall(CallInst &I) {
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
     if (F->isDeclaration()) {
-      const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
+      const TargetIntrinsicInfo *II = TM.getIntrinsicInfo();
       if (II) {
         if (unsigned IID = II->getIntrinsicID(F)) {
           RenameFn = visitIntrinsicCall(I, IID);
@@ -4632,7 +4612,7 @@
     // can't be a library call.
     if (!F->hasLocalLinkage() && F->hasName()) {
       StringRef Name = F->getName();
-      if (Name == "copysign" || Name == "copysignf") {
+      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
         if (I.getNumOperands() == 3 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
@@ -4837,14 +4817,13 @@
 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
 /// operand list.  This adds the code marker and includes the number of
 /// values added into it.
-void RegsForValue::AddInlineAsmOperands(unsigned Code,
-                                        bool HasMatching,unsigned MatchingIdx,
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+                                        unsigned MatchingIdx,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
-  assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
-  unsigned Flag = Code | (Regs.size() << 3);
+  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
   if (HasMatching)
-    Flag |= 0x80000000 | (MatchingIdx << 16);
+    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
   SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
   Ops.push_back(Res);
 
@@ -4960,7 +4939,7 @@
     if (isIndirect) {
       const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
       if (!PtrTy)
-        llvm_report_error("Indirect operand for inline asm not a pointer!");
+        report_fatal_error("Indirect operand for inline asm not a pointer!");
       OpTy = PtrTy->getElementType();
     }
 
@@ -5180,31 +5159,10 @@
   // Otherwise, we couldn't allocate enough registers for this.
 }
 
-/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
-/// processed uses a memory 'm' constraint.
-static bool
-hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
-                          const TargetLowering &TLI) {
-  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
-    InlineAsm::ConstraintInfo &CI = CInfos[i];
-    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
-      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
-      if (CType == TargetLowering::C_Memory)
-        return true;
-    }
-
-    // Indirect operand accesses access memory.
-    if (CI.isIndirect)
-      return true;
-  }
-
-  return false;
-}
-
 /// visitInlineAsm - Handle a call to an InlineAsm object.
 ///
-void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
-  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
 
   /// ConstraintOperands - Information about all of the constraints.
   std::vector<SDISelAsmOperandInfo> ConstraintOperands;
@@ -5240,7 +5198,7 @@
     case InlineAsm::isOutput:
       // Indirect outputs just consume an argument.
       if (OpInfo.isIndirect) {
-        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
         break;
       }
 
@@ -5257,7 +5215,7 @@
       ++ResNo;
       break;
     case InlineAsm::isInput:
-      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
       break;
     case InlineAsm::isClobber:
       // Nothing to do.
@@ -5270,7 +5228,7 @@
       // Strip bitcasts, if any.  This mostly comes up for functions.
       OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
 
-      if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
       } else {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
@@ -5293,14 +5251,15 @@
     // error.
     if (OpInfo.hasMatchingInput()) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+      
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
             (OpInfo.ConstraintVT.getSizeInBits() !=
              Input.ConstraintVT.getSizeInBits())) {
-          llvm_report_error("Unsupported asm: input constraint"
-                            " with a matching output constraint of incompatible"
-                            " type!");
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
         }
         Input.ConstraintVT = OpInfo.ConstraintVT;
       }
@@ -5322,7 +5281,7 @@
 
       // If the operand is a float, integer, or vector constant, spill to a
       // constant pool entry to get its address.
-      Value *OpVal = OpInfo.CallOperandVal;
+      const Value *OpVal = OpInfo.CallOperandVal;
       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
           isa<ConstantVector>(OpVal)) {
         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
@@ -5375,6 +5334,11 @@
           DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
                                       TLI.getPointerTy()));
 
+  // If we have a !srcloc metadata node associated with it, we want to attach
+  // this to the ultimately generated inline asm machineinstr.  To do this, we
+  // pass in the third operand as this (potentially null) inline asm MDNode.
+  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -5394,8 +5358,8 @@
         assert(OpInfo.isIndirect && "Memory output must be indirect operand");
 
         // Add information to the INLINEASM node to know about this output.
-        unsigned ResOpType = 4/*MEM*/ | (1<<3);
-        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.push_back(OpInfo.CallOperand);
         break;
@@ -5405,10 +5369,9 @@
 
       // Copy the output from the appropriate register.  Find a register that
       // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty()) {
-        llvm_report_error("Couldn't allocate output reg for"
-                          " constraint '" + OpInfo.ConstraintCode + "'!");
-      }
+      if (OpInfo.AssignedRegs.Regs.empty())
+        report_fatal_error("Couldn't allocate output reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
 
       // If this is an indirect operand, store through the pointer after the
       // asm.
@@ -5425,8 +5388,8 @@
       // Add information to the INLINEASM node to know that this register is
       // set.
       OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
-                                               6 /* EARLYCLOBBER REGDEF */ :
-                                               2 /* REGDEF */ ,
+                                           InlineAsm::Kind_RegDefEarlyClobber :
+                                               InlineAsm::Kind_RegDef,
                                                false,
                                                0,
                                                DAG,
@@ -5443,27 +5406,30 @@
 
         // Scan until we find the definition we already emitted of this operand.
         // When we find it, create a RegsForValue operand.
-        unsigned CurOp = 2;  // The first operand.
+        unsigned CurOp = InlineAsm::Op_FirstOperand;
         for (; OperandNo; --OperandNo) {
           // Advance to the next operand.
           unsigned OpFlag =
             cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-          assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
-                  (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
-                  (OpFlag & 7) == 4 /*MEM*/) &&
-                 "Skipped past definitions?");
+          assert((InlineAsm::isRegDefKind(OpFlag) ||
+                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
           CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
         }
 
         unsigned OpFlag =
           cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-        if ((OpFlag & 7) == 2 /*REGDEF*/
-            || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
+        if (InlineAsm::isRegDefKind(OpFlag) ||
+            InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
           if (OpInfo.isIndirect) {
-            llvm_report_error("Don't know how to handle tied indirect "
-                              "register inputs yet!");
+            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+            LLVMContext &Ctx = *DAG.getContext();
+            Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
+                          " don't know how to handle tied "
+                          "indirect register inputs");
           }
+          
           RegsForValue MatchedRegs;
           MatchedRegs.TLI = &TLI;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
@@ -5478,22 +5444,23 @@
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                     Chain, &Flag);
-          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                            true, OpInfo.getMatchedOperand(),
                                            DAG, AsmNodeOperands);
           break;
-        } else {
-          assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
-          assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
-                 "Unexpected number of operands");
-          // Add information to the INLINEASM node to know about this input.
-          // See InlineAsm.h isUseOperandTiedToDef.
-          OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
-          AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
-                                                          TLI.getPointerTy()));
-          AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
-          break;
         }
+        
+        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+               "Unexpected number of operands");
+        // Add information to the INLINEASM node to know about this input.
+        // See InlineAsm.h isUseOperandTiedToDef.
+        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+                                                    OpInfo.getMatchedOperand());
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+        break;
       }
 
       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
@@ -5503,24 +5470,26 @@
         std::vector<SDValue> Ops;
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
                                          hasMemory, Ops, DAG);
-        if (Ops.empty()) {
-          llvm_report_error("Invalid operand for inline asm"
-                            " constraint '" + OpInfo.ConstraintCode + "'!");
-        }
+        if (Ops.empty())
+          report_fatal_error("Invalid operand for inline asm constraint '" +
+                             Twine(OpInfo.ConstraintCode) + "'!");
 
         // Add information to the INLINEASM node to know about this input.
-        unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
+        unsigned ResOpType =
+          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
-      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+      }
+      
+      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
                "Memory operands expect pointer values");
 
         // Add information to the INLINEASM node to know about this input.
-        unsigned ResOpType = 4/*MEM*/ | (1<<3);
+        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.push_back(InOperandVal);
@@ -5535,15 +5504,14 @@
 
       // Copy the input into the appropriate registers.
       if (OpInfo.AssignedRegs.Regs.empty() ||
-          !OpInfo.AssignedRegs.areValueTypesLegal()) {
-        llvm_report_error("Couldn't allocate input reg for"
-                          " constraint '"+ OpInfo.ConstraintCode +"'!");
-      }
+          !OpInfo.AssignedRegs.areValueTypesLegal())
+        report_fatal_error("Couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                         Chain, &Flag);
 
-      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
+      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
                                                DAG, AsmNodeOperands);
       break;
     }
@@ -5551,7 +5519,8 @@
       // Add the clobbered value to the operand list, so that the register
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
+        OpInfo.AssignedRegs.AddInlineAsmOperands(
+                                            InlineAsm::Kind_RegDefEarlyClobber,
                                                  false, 0, DAG,
                                                  AsmNodeOperands);
       break;
@@ -5559,7 +5528,7 @@
     }
   }
 
-  // Finish up input operands.
+  // Finish up input operands.  Set the input chain and add the flag last.
   AsmNodeOperands[0] = Chain;
   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
 
@@ -5604,17 +5573,16 @@
       return;
   }
 
-  std::vector<std::pair<SDValue, Value*> > StoresToEmit;
+  std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
 
   // Process indirect outputs, first output all of the flagged copies out of
   // physregs.
   for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
-    Value *Ptr = IndirectStoresToEmit[i].second;
+    const Value *Ptr = IndirectStoresToEmit[i].second;
     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
                                              Chain, &Flag);
     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
-
   }
 
   // Emit the non-flagged stores from the physregs.
@@ -5635,14 +5603,14 @@
   DAG.setRoot(Chain);
 }
 
-void SelectionDAGBuilder::visitVAStart(CallInst &I) {
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
-void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)));
@@ -5650,14 +5618,14 @@
   DAG.setRoot(V.getValue(1));
 }
 
-void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
-void SelectionDAGBuilder::visitVACopy(CallInst &I) {
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
@@ -5677,7 +5645,8 @@
                             CallingConv::ID CallConv, bool isTailCall,
                             bool isReturnValueUsed,
                             SDValue Callee,
-                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
+                            ArgListTy &Args, SelectionDAG &DAG,
+                            DebugLoc dl) const {
   // Handle all of the outgoing arguments.
   SmallVector<ISD::OutputArg, 32> Outs;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
@@ -5777,12 +5746,6 @@
          "LowerCall emitted a return value for a tail call!");
   assert((isTailCall || InVals.size() == Ins.size()) &&
          "LowerCall didn't emit the correct number of values!");
-  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-          assert(InVals[i].getNode() &&
-                 "LowerCall emitted a null value!");
-          assert(Ins[i].VT == InVals[i].getValueType() &&
-                 "LowerCall emitted a value with the wrong type!");
-        });
 
   // For a tail call, the return value is merely live-out and there aren't
   // any nodes in the DAG representing it. Return a special value to
@@ -5793,6 +5756,13 @@
     return std::make_pair(SDValue(), SDValue());
   }
 
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerCall emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerCall emitted a value with the wrong type!");
+        });
+
   // Collect the legal value parts into potentially illegal values
   // that correspond to the original function's return values.
   ISD::NodeType AssertOp = ISD::DELETED_NODE;
@@ -5827,18 +5797,19 @@
 
 void TargetLowering::LowerOperationWrapper(SDNode *N,
                                            SmallVectorImpl<SDValue> &Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
   if (Res.getNode())
     Results.push_back(Res);
 }
 
-SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("LowerOperation not implemented for this target!");
   return SDValue();
 }
 
-void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
   SDValue Op = getValue(V);
   assert((Op.getOpcode() != ISD::CopyFromReg ||
           cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
@@ -5853,9 +5824,9 @@
 
 #include "llvm/CodeGen/SelectionDAGISel.h"
 
-void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
+void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
   // If this is the entry block, emit arguments.
-  Function &F = *LLVMBB->getParent();
+  const Function &F = *LLVMBB->getParent();
   SelectionDAG &DAG = SDB->DAG;
   SDValue OldRoot = DAG.getRoot();
   DebugLoc dl = SDB->getCurDebugLoc();
@@ -5880,14 +5851,14 @@
     // or one register.
     ISD::ArgFlagsTy Flags;
     Flags.setSRet();
-    EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
     ISD::InputArg RetArg(Flags, RegisterVT, true);
     Ins.push_back(RetArg);
   }
 
   // Set up the incoming argument description vector.
   unsigned Idx = 1;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
        I != E; ++I, ++Idx) {
     SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(TLI, I->getType(), ValueVTs);
@@ -5989,7 +5960,7 @@
     ++i;
   }
 
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
       ++I, ++Idx) {
     SmallVector<SDValue, 4> ArgValues;
     SmallVector<EVT, 4> ValueVTs;
@@ -6016,8 +5987,10 @@
     }
 
     if (!I->use_empty()) {
-      SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
-                                       SDB->getCurDebugLoc());
+      SDValue Res;
+      if (!ArgValues.empty())
+        Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+                                 SDB->getCurDebugLoc());
       SDB->setValue(I, Res);
 
       // If this argument is live outside of the entry block, insert a copy from
@@ -6030,7 +6003,7 @@
 
   // Finally, if the target has anything special to do, allow it to do so.
   // FIXME: this should insert code into the DAG!
-  EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
+  EmitFunctionEntryCode();
 }
 
 /// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
@@ -6041,51 +6014,50 @@
 /// the end.
 ///
 void
-SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
-  TerminatorInst *TI = LLVMBB->getTerminator();
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
 
   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
 
   // Check successor nodes' PHI nodes that expect a constant to be available
   // from this block.
   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
-    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
     if (!isa<PHINode>(SuccBB->begin())) continue;
-    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
 
     // If this terminator has multiple identical successors (common for
     // switches), only handle each succ once.
     if (!SuccsHandled.insert(SuccMBB)) continue;
 
     MachineBasicBlock::iterator MBBI = SuccMBB->begin();
-    PHINode *PN;
 
     // At this point we know that there is a 1-1 correspondence between LLVM PHI
     // nodes and Machine PHI nodes, but the incoming operands have not been
     // emitted yet.
-    for (BasicBlock::iterator I = SuccBB->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
       // Ignore dead phi's.
       if (PN->use_empty()) continue;
 
       unsigned Reg;
-      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
 
-      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
-        unsigned &RegOut = SDB->ConstantsOut[C];
+      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = ConstantsOut[C];
         if (RegOut == 0) {
-          RegOut = FuncInfo->CreateRegForValue(C);
-          SDB->CopyValueToVirtualRegister(C, RegOut);
+          RegOut = FuncInfo.CreateRegForValue(C);
+          CopyValueToVirtualRegister(C, RegOut);
         }
         Reg = RegOut;
       } else {
-        Reg = FuncInfo->ValueMap[PHIOp];
+        Reg = FuncInfo.ValueMap[PHIOp];
         if (Reg == 0) {
           assert(isa<AllocaInst>(PHIOp) &&
-                 FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
                  "Didn't codegen value into a register!??");
-          Reg = FuncInfo->CreateRegForValue(PHIOp);
-          SDB->CopyValueToVirtualRegister(PHIOp, Reg);
+          Reg = FuncInfo.CreateRegForValue(PHIOp);
+          CopyValueToVirtualRegister(PHIOp, Reg);
         }
       }
 
@@ -6095,77 +6067,12 @@
       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
         EVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
-          SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+          FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
         Reg += NumRegisters;
       }
     }
   }
-  SDB->ConstantsOut.clear();
-}
-
-/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
-/// supports legal types, and it emits MachineInstrs directly instead of
-/// creating SelectionDAG nodes.
-///
-bool
-SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
-                                                      FastISel *F) {
-  TerminatorInst *TI = LLVMBB->getTerminator();
-
-  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
-  unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
-
-  // Check successor nodes' PHI nodes that expect a constant to be available
-  // from this block.
-  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
-    BasicBlock *SuccBB = TI->getSuccessor(succ);
-    if (!isa<PHINode>(SuccBB->begin())) continue;
-    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
-
-    // If this terminator has multiple identical successors (common for
-    // switches), only handle each succ once.
-    if (!SuccsHandled.insert(SuccMBB)) continue;
-
-    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
-    PHINode *PN;
-
-    // At this point we know that there is a 1-1 correspondence between LLVM PHI
-    // nodes and Machine PHI nodes, but the incoming operands have not been
-    // emitted yet.
-    for (BasicBlock::iterator I = SuccBB->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
-      // Ignore dead phi's.
-      if (PN->use_empty()) continue;
-
-      // Only handle legal types. Two interesting things to note here. First,
-      // by bailing out early, we may leave behind some dead instructions,
-      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
-      // own moves. Second, this check is necessary becuase FastISel doesn't
-      // use CreateRegForValue to create registers, so it always creates
-      // exactly one register for each non-void instruction.
-      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
-      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
-        // Promote MVT::i1.
-        if (VT == MVT::i1)
-          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
-        else {
-          SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
-          return false;
-        }
-      }
-
-      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
-
-      unsigned Reg = F->getRegForValue(PHIOp);
-      if (Reg == 0) {
-        SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
-        return false;
-      }
-      SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
-    }
-  }
-
-  return true;
+  ConstantsOut.clear();
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bc4b33d..d9495fb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -56,7 +56,6 @@
 class Instruction;
 class LoadInst;
 class MachineBasicBlock;
-class MachineFunction;
 class MachineInstr;
 class MachineRegisterInfo;
 class PHINode;
@@ -81,11 +80,8 @@
 //===----------------------------------------------------------------------===//
 /// SelectionDAGBuilder - This is the common target-independent lowering
 /// implementation that is parameterized by a TargetLowering object.
-/// Also, targets can overload any lowering method.
 ///
 class SelectionDAGBuilder {
-  MachineBasicBlock *CurMBB;
-
   /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
   DebugLoc CurDebugLoc;
 
@@ -144,15 +140,16 @@
   /// CaseRec - A struct with ctor used in lowering switches to a binary tree
   /// of conditional branches.
   struct CaseRec {
-    CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+    CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+            CaseRange r) :
     CaseBB(bb), LT(lt), GE(ge), Range(r) {}
 
     /// CaseBB - The MBB in which to emit the compare and branch
     MachineBasicBlock *CaseBB;
     /// LT, GE - If nonzero, we know the current case value must be less-than or
     /// greater-than-or-equal-to these Constants.
-    Constant *LT;
-    Constant *GE;
+    const Constant *LT;
+    const Constant *GE;
     /// Range - A pair of iterators representing the range of case values to be
     /// processed at this point in the binary search tree.
     CaseRange Range;
@@ -183,7 +180,8 @@
   /// SelectionDAGBuilder and SDISel for the code generation of additional basic
   /// blocks needed by multi-case switch statements.
   struct CaseBlock {
-    CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
+    CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+              const Value *cmpmiddle,
               MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
               MachineBasicBlock *me)
       : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
@@ -193,7 +191,7 @@
     // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
     // Emit by default LHS op RHS. MHS is used for range comparisons:
     // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
-    Value *CmpLHS, *CmpMHS, *CmpRHS;
+    const Value *CmpLHS, *CmpMHS, *CmpRHS;
     // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
     MachineBasicBlock *TrueBB, *FalseBB;
     // ThisBB - the block into which to emit the code for the setcc and branches
@@ -215,12 +213,12 @@
     MachineBasicBlock *Default;
   };
   struct JumpTableHeader {
-    JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H,
+    JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
                     bool E = false):
       First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
     APInt First;
     APInt Last;
-    Value *SValue;
+    const Value *SValue;
     MachineBasicBlock *HeaderBB;
     bool Emitted;
   };
@@ -237,7 +235,7 @@
   typedef SmallVector<BitTestCase, 3> BitTestInfo;
 
   struct BitTestBlock {
-    BitTestBlock(APInt F, APInt R, Value* SV,
+    BitTestBlock(APInt F, APInt R, const Value* SV,
                  unsigned Rg, bool E,
                  MachineBasicBlock* P, MachineBasicBlock* D,
                  const BitTestInfo& C):
@@ -245,7 +243,7 @@
       Parent(P), Default(D), Cases(C) { }
     APInt First;
     APInt Range;
-    Value  *SValue;
+    const Value *SValue;
     unsigned Reg;
     bool Emitted;
     MachineBasicBlock *Parent;
@@ -257,7 +255,8 @@
   // TLI - This is information that describes the available target features we
   // need for lowering.  This indicates when operations are unavailable,
   // implemented with a libcall, etc.
-  TargetLowering &TLI;
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
   const TargetData *TD;
   AliasAnalysis *AA;
@@ -272,17 +271,13 @@
   /// SwitchInst code generation information.
   std::vector<BitTestBlock> BitTestCases;
 
-  /// PHINodesToUpdate - A list of phi instructions whose operand list will
-  /// be updated after processing the current basic block.
-  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
-
   /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
   /// scheduler custom lowering), track the change here.
   DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
 
   // Emit PHI-node-operand constants only once even if used by multiple
   // PHI nodes.
-  DenseMap<Constant*, unsigned> ConstantsOut;
+  DenseMap<const Constant *, unsigned> ConstantsOut;
 
   /// FuncInfo - Information about the function as a whole.
   ///
@@ -303,18 +298,16 @@
 
   LLVMContext *Context;
 
-  SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli,
-                      FunctionLoweringInfo &funcinfo,
+  SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
                       CodeGenOpt::Level ol)
-    : CurDebugLoc(DebugLoc::getUnknownLoc()), SDNodeOrder(0),
-      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
-      HasTailCall(false),
-      Context(dag.getContext()) {
+    : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+      DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      HasTailCall(false), Context(dag.getContext()) {
   }
 
   void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
 
-  /// clear - Clear out the curret SelectionDAG and the associated
+  /// clear - Clear out the current SelectionDAG and the associated
   /// state and prepare this SelectionDAGBuilder object to be used
   /// for a new block. This doesn't clear out information about
   /// additional blocks that are needed to complete switch lowering
@@ -336,22 +329,19 @@
   SDValue getControlRoot();
 
   DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-  void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
 
   unsigned getSDNodeOrder() const { return SDNodeOrder; }
 
-  void CopyValueToVirtualRegister(Value *V, unsigned Reg);
+  void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
 
   /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
   /// from how the code appeared in the source. The ordering is used by the
   /// scheduler to effectively turn off scheduling.
   void AssignOrderingToNode(const SDNode *Node);
 
-  void visit(Instruction &I);
+  void visit(const Instruction &I);
 
-  void visit(unsigned Opcode, User &I);
-
-  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+  void visit(unsigned Opcode, const User &I);
 
   SDValue getValue(const Value *V);
 
@@ -365,136 +355,146 @@
                             std::set<unsigned> &OutputRegs, 
                             std::set<unsigned> &InputRegs);
 
-  void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+  void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
                             MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
-                            unsigned Opc);
-  void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *SwitchBB, unsigned Opc);
+  void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
-                                    MachineBasicBlock *CurBB);
+                                    MachineBasicBlock *CurBB,
+                                    MachineBasicBlock *SwitchBB);
   bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
-  bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
-  void CopyToExportRegsIfNeeded(Value *V);
-  void ExportFromCurrentBlock(Value *V);
-  void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
+  bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+  void CopyToExportRegsIfNeeded(const Value *V);
+  void ExportFromCurrentBlock(const Value *V);
+  void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
                    MachineBasicBlock *LandingPad = NULL);
 
 private:
   // Terminator instructions.
-  void visitRet(ReturnInst &I);
-  void visitBr(BranchInst &I);
-  void visitSwitch(SwitchInst &I);
-  void visitIndirectBr(IndirectBrInst &I);
-  void visitUnreachable(UnreachableInst &I) { /* noop */ }
+  void visitRet(const ReturnInst &I);
+  void visitBr(const BranchInst &I);
+  void visitSwitch(const SwitchInst &I);
+  void visitIndirectBr(const IndirectBrInst &I);
+  void visitUnreachable(const UnreachableInst &I) { /* noop */ }
 
   // Helpers for visitSwitch
   bool handleSmallSwitchRange(CaseRec& CR,
                               CaseRecVector& WorkList,
-                              Value* SV,
-                              MachineBasicBlock* Default);
+                              const Value* SV,
+                              MachineBasicBlock* Default,
+                              MachineBasicBlock *SwitchBB);
   bool handleJTSwitchCase(CaseRec& CR,
                           CaseRecVector& WorkList,
-                          Value* SV,
-                          MachineBasicBlock* Default);
+                          const Value* SV,
+                          MachineBasicBlock* Default,
+                          MachineBasicBlock *SwitchBB);
   bool handleBTSplitSwitchCase(CaseRec& CR,
                                CaseRecVector& WorkList,
-                               Value* SV,
-                               MachineBasicBlock* Default);
+                               const Value* SV,
+                               MachineBasicBlock* Default,
+                               MachineBasicBlock *SwitchBB);
   bool handleBitTestsSwitchCase(CaseRec& CR,
                                 CaseRecVector& WorkList,
-                                Value* SV,
-                                MachineBasicBlock* Default);  
+                                const Value* SV,
+                                MachineBasicBlock* Default,
+                                MachineBasicBlock *SwitchBB);
 public:
-  void visitSwitchCase(CaseBlock &CB);
-  void visitBitTestHeader(BitTestBlock &B);
+  void visitSwitchCase(CaseBlock &CB,
+                       MachineBasicBlock *SwitchBB);
+  void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
   void visitBitTestCase(MachineBasicBlock* NextMBB,
                         unsigned Reg,
-                        BitTestCase &B);
+                        BitTestCase &B,
+                        MachineBasicBlock *SwitchBB);
   void visitJumpTable(JumpTable &JT);
-  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
+  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+                            MachineBasicBlock *SwitchBB);
   
 private:
   // These all get lowered before this pass.
-  void visitInvoke(InvokeInst &I);
-  void visitUnwind(UnwindInst &I);
+  void visitInvoke(const InvokeInst &I);
+  void visitUnwind(const UnwindInst &I);
 
-  void visitBinary(User &I, unsigned OpCode);
-  void visitShift(User &I, unsigned Opcode);
-  void visitAdd(User &I)  { visitBinary(I, ISD::ADD); }
-  void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
-  void visitSub(User &I)  { visitBinary(I, ISD::SUB); }
-  void visitFSub(User &I);
-  void visitMul(User &I)  { visitBinary(I, ISD::MUL); }
-  void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
-  void visitURem(User &I) { visitBinary(I, ISD::UREM); }
-  void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
-  void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
-  void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
-  void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
-  void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
-  void visitAnd (User &I) { visitBinary(I, ISD::AND); }
-  void visitOr  (User &I) { visitBinary(I, ISD::OR); }
-  void visitXor (User &I) { visitBinary(I, ISD::XOR); }
-  void visitShl (User &I) { visitShift(I, ISD::SHL); }
-  void visitLShr(User &I) { visitShift(I, ISD::SRL); }
-  void visitAShr(User &I) { visitShift(I, ISD::SRA); }
-  void visitICmp(User &I);
-  void visitFCmp(User &I);
+  void visitBinary(const User &I, unsigned OpCode);
+  void visitShift(const User &I, unsigned Opcode);
+  void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(const User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(const User &I);
+  void visitMul(const User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+  void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); }
+  void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (const User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(const User &I);
+  void visitFCmp(const User &I);
   // Visit the conversion instructions
-  void visitTrunc(User &I);
-  void visitZExt(User &I);
-  void visitSExt(User &I);
-  void visitFPTrunc(User &I);
-  void visitFPExt(User &I);
-  void visitFPToUI(User &I);
-  void visitFPToSI(User &I);
-  void visitUIToFP(User &I);
-  void visitSIToFP(User &I);
-  void visitPtrToInt(User &I);
-  void visitIntToPtr(User &I);
-  void visitBitCast(User &I);
+  void visitTrunc(const User &I);
+  void visitZExt(const User &I);
+  void visitSExt(const User &I);
+  void visitFPTrunc(const User &I);
+  void visitFPExt(const User &I);
+  void visitFPToUI(const User &I);
+  void visitFPToSI(const User &I);
+  void visitUIToFP(const User &I);
+  void visitSIToFP(const User &I);
+  void visitPtrToInt(const User &I);
+  void visitIntToPtr(const User &I);
+  void visitBitCast(const User &I);
 
-  void visitExtractElement(User &I);
-  void visitInsertElement(User &I);
-  void visitShuffleVector(User &I);
+  void visitExtractElement(const User &I);
+  void visitInsertElement(const User &I);
+  void visitShuffleVector(const User &I);
 
-  void visitExtractValue(ExtractValueInst &I);
-  void visitInsertValue(InsertValueInst &I);
+  void visitExtractValue(const ExtractValueInst &I);
+  void visitInsertValue(const InsertValueInst &I);
 
-  void visitGetElementPtr(User &I);
-  void visitSelect(User &I);
+  void visitGetElementPtr(const User &I);
+  void visitSelect(const User &I);
 
-  void visitAlloca(AllocaInst &I);
-  void visitLoad(LoadInst &I);
-  void visitStore(StoreInst &I);
-  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
-  void visitCall(CallInst &I);
-  bool visitMemCmpCall(CallInst &I);
+  void visitAlloca(const AllocaInst &I);
+  void visitLoad(const LoadInst &I);
+  void visitStore(const StoreInst &I);
+  void visitPHI(const PHINode &I);
+  void visitCall(const CallInst &I);
+  bool visitMemCmpCall(const CallInst &I);
   
-  void visitInlineAsm(CallSite CS);
-  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
-  void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+  void visitInlineAsm(ImmutableCallSite CS);
+  const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
 
-  void visitPow(CallInst &I);
-  void visitExp2(CallInst &I);
-  void visitExp(CallInst &I);
-  void visitLog(CallInst &I);
-  void visitLog2(CallInst &I);
-  void visitLog10(CallInst &I);
+  void visitPow(const CallInst &I);
+  void visitExp2(const CallInst &I);
+  void visitExp(const CallInst &I);
+  void visitLog(const CallInst &I);
+  void visitLog2(const CallInst &I);
+  void visitLog10(const CallInst &I);
 
-  void visitVAStart(CallInst &I);
-  void visitVAArg(VAArgInst &I);
-  void visitVAEnd(CallInst &I);
-  void visitVACopy(CallInst &I);
+  void visitVAStart(const CallInst &I);
+  void visitVAArg(const VAArgInst &I);
+  void visitVAEnd(const CallInst &I);
+  void visitVACopy(const CallInst &I);
 
-  void visitUserOp1(Instruction &I) {
+  void visitUserOp1(const Instruction &I) {
     llvm_unreachable("UserOp1 should not exist at instruction selection time!");
   }
-  void visitUserOp2(Instruction &I) {
+  void visitUserOp2(const Instruction &I) {
     llvm_unreachable("UserOp2 should not exist at instruction selection time!");
   }
   
-  const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
-  const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
+  const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
+
+  void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 05f9f1f..2cdd1cc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,10 +19,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
@@ -32,19 +29,13 @@
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -53,7 +44,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
@@ -61,6 +51,7 @@
 using namespace llvm;
 
 STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
 
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
@@ -69,10 +60,6 @@
 static cl::opt<bool>
 EnableFastISelAbort("fast-isel-abort", cl::Hidden,
           cl::desc("Enable abort calls when \"fast\" instruction fails"));
-static cl::opt<bool>
-SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
-                  cl::desc("Schedule copies of livein registers"),
-                  cl::init(false));
 
 #ifndef NDEBUG
 static cl::opt<bool>
@@ -173,115 +160,15 @@
   return 0;
 }
 
-/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
-/// physical register has only a single copy use, then coalesced the copy
-/// if possible.
-static void EmitLiveInCopy(MachineBasicBlock *MBB,
-                           MachineBasicBlock::iterator &InsertPos,
-                           unsigned VirtReg, unsigned PhysReg,
-                           const TargetRegisterClass *RC,
-                           DenseMap<MachineInstr*, unsigned> &CopyRegMap,
-                           const MachineRegisterInfo &MRI,
-                           const TargetRegisterInfo &TRI,
-                           const TargetInstrInfo &TII) {
-  unsigned NumUses = 0;
-  MachineInstr *UseMI = NULL;
-  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
-         UE = MRI.use_end(); UI != UE; ++UI) {
-    UseMI = &*UI;
-    if (++NumUses > 1)
-      break;
-  }
-
-  // If the number of uses is not one, or the use is not a move instruction,
-  // don't coalesce. Also, only coalesce away a virtual register to virtual
-  // register copy.
-  bool Coalesced = false;
-  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-  if (NumUses == 1 &&
-      TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-      TargetRegisterInfo::isVirtualRegister(DstReg)) {
-    VirtReg = DstReg;
-    Coalesced = true;
-  }
-
-  // Now find an ideal location to insert the copy.
-  MachineBasicBlock::iterator Pos = InsertPos;
-  while (Pos != MBB->begin()) {
-    MachineInstr *PrevMI = prior(Pos);
-    DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
-    // copyRegToReg might emit multiple instructions to do a copy.
-    unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
-    if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
-      // This is what the BB looks like right now:
-      // r1024 = mov r0
-      // ...
-      // r1    = mov r1024
-      //
-      // We want to insert "r1025 = mov r1". Inserting this copy below the
-      // move to r1024 makes it impossible for that move to be coalesced.
-      //
-      // r1025 = mov r1
-      // r1024 = mov r0
-      // ...
-      // r1    = mov 1024
-      // r2    = mov 1025
-      break; // Woot! Found a good location.
-    --Pos;
-  }
-
-  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
-  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
-  (void) Emitted;
-
-  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
-  if (Coalesced) {
-    if (&*InsertPos == UseMI) ++InsertPos;
-    MBB->erase(UseMI);
-  }
-}
-
-/// EmitLiveInCopies - If this is the first basic block in the function,
-/// and if it has live ins that need to be copied into vregs, emit the
-/// copies into the block.
-static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
-                             const MachineRegisterInfo &MRI,
-                             const TargetRegisterInfo &TRI,
-                             const TargetInstrInfo &TII) {
-  if (SchedLiveInCopies) {
-    // Emit the copies at a heuristically-determined location in the block.
-    DenseMap<MachineInstr*, unsigned> CopyRegMap;
-    MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
-    for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
-           E = MRI.livein_end(); LI != E; ++LI)
-      if (LI->second) {
-        const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
-        EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
-                       RC, CopyRegMap, MRI, TRI, TII);
-      }
-  } else {
-    // Emit the copies into the top of the block.
-    for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
-           E = MRI.livein_end(); LI != E; ++LI)
-      if (LI->second) {
-        const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
-        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
-                                        LI->second, LI->first, RC, RC);
-        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
-        (void) Emitted;
-      }
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
-SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
   MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
-  CurDAG(new SelectionDAG(TLI, *FuncInfo)),
-  SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)),
+  CurDAG(new SelectionDAG(tm, *FuncInfo)),
+  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
   DAGSize(0)
@@ -293,152 +180,70 @@
   delete FuncInfo;
 }
 
-unsigned SelectionDAGISel::MakeReg(EVT VT) {
-  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
-}
-
 void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
-  AU.addRequired<DwarfWriter>();
-  AU.addPreserved<DwarfWriter>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
-  Function &Fn = *mf.getFunction();
-
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
   assert((!EnableFastISelAbort || EnableFastISel) &&
          "-fast-isel-abort requires -fast-isel");
 
-  // Get alias analysis for load/store combining.
-  AA = &getAnalysis<AliasAnalysis>();
-
-  MF = &mf;
+  const Function &Fn = *mf.getFunction();
   const TargetInstrInfo &TII = *TM.getInstrInfo();
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
 
-  if (Fn.hasGC())
-    GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn);
-  else
-    GFI = 0;
+  MF = &mf;
   RegInfo = &MF->getRegInfo();
+  AA = &getAnalysis<AliasAnalysis>();
+  GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
-  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-  CurDAG->init(*MF, MMI, DW);
+  CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF, EnableFastISel);
   SDB->init(GFI, *AA);
 
-  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
-    if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
-      // Mark landing pad.
-      FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+  SelectAllBasicBlocks(Fn);
 
-  SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII);
+  // Release function-specific state. SDB and CurDAG are already cleared
+  // at this point.
+  FuncInfo->clear();
 
   // If the first basic block in the function has live ins that need to be
   // copied into vregs, emit the copies into the top of the block before
   // emitting the code for the block.
-  EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII);
-
-  // Add function live-ins to entry block live-in set.
-  for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(),
-         E = RegInfo->livein_end(); I != E; ++I)
-    MF->begin()->addLiveIn(I->first);
-
-#ifndef NDEBUG
-  assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() &&
-         "Not all catch info was assigned to a landing pad!");
-#endif
-
-  FuncInfo->clear();
+  RegInfo->EmitLiveInCopies(MF->begin(), TRI, TII);
 
   return true;
 }
 
-/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is
-/// attached with this instruction.
-static void SetDebugLoc(unsigned MDDbgKind, Instruction *I,
-                        SelectionDAGBuilder *SDB,
-                        FastISel *FastIS, MachineFunction *MF) {
-  if (isa<DbgInfoIntrinsic>(I)) return;
-  
-  if (MDNode *Dbg = I->getMetadata(MDDbgKind)) {
-    DILocation DILoc(Dbg);
-    DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
-
-    SDB->setCurDebugLoc(Loc);
-
-    if (FastIS)
-      FastIS->setCurDebugLoc(Loc);
-
-    // If the function doesn't have a default debug location yet, set
-    // it. This is kind of a hack.
-    if (MF->getDefaultDebugLoc().isUnknown())
-      MF->setDefaultDebugLoc(Loc);
-  }
-}
-
-/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
-static void ResetDebugLoc(SelectionDAGBuilder *SDB, FastISel *FastIS) {
-  SDB->setCurDebugLoc(DebugLoc::getUnknownLoc());
-  if (FastIS)
-    FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc());
-}
-
-void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
-                                        BasicBlock::iterator Begin,
-                                        BasicBlock::iterator End,
-                                        bool &HadTailCall) {
-  SDB->setCurrentBasicBlock(BB);
-  unsigned MDDbgKind = LLVMBB->getContext().getMDKindID("dbg");
-
+MachineBasicBlock *
+SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB,
+                                   const BasicBlock *LLVMBB,
+                                   BasicBlock::const_iterator Begin,
+                                   BasicBlock::const_iterator End,
+                                   bool &HadTailCall) {
   // Lower all of the non-terminator instructions. If a call is emitted
-  // as a tail call, cease emitting nodes for this block.
-  for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
-    SetDebugLoc(MDDbgKind, I, SDB, 0, MF);
-
-    if (!isa<TerminatorInst>(I)) {
-      SDB->visit(*I);
-
-      // Set the current debug location back to "unknown" so that it doesn't
-      // spuriously apply to subsequent instructions.
-      ResetDebugLoc(SDB, 0);
-    }
-  }
-
-  if (!SDB->HasTailCall) {
-    // Ensure that all instructions which are used outside of their defining
-    // blocks are available as virtual registers.  Invoke is handled elsewhere.
-    for (BasicBlock::iterator I = Begin; I != End; ++I)
-      if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
-        SDB->CopyToExportRegsIfNeeded(I);
-
-    // Handle PHI nodes in successor blocks.
-    if (End == LLVMBB->end()) {
-      HandlePHINodesInSuccessorBlocks(LLVMBB);
-
-      // Lower the terminator after the copies are emitted.
-      SetDebugLoc(MDDbgKind, LLVMBB->getTerminator(), SDB, 0, MF);
-      SDB->visit(*LLVMBB->getTerminator());
-      ResetDebugLoc(SDB, 0);
-    }
-  }
+  // as a tail call, cease emitting nodes for this block. Terminators
+  // are handled below.
+  for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+    SDB->visit(*I);
 
   // Make sure the root of the DAG is up-to-date.
   CurDAG->setRoot(SDB->getControlRoot());
 
   // Final step, emit the lowered DAG as machine code.
-  CodeGenAndEmitDAG();
+  BB = CodeGenAndEmitDAG(BB);
   HadTailCall = SDB->HasTailCall;
   SDB->clear();
+  return BB;
 }
 
 namespace {
@@ -446,12 +251,25 @@
 /// nodes from the worklist.
 class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
   SmallVector<SDNode*, 128> &Worklist;
+  SmallPtrSet<SDNode*, 128> &InWorklist;
 public:
-  SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl) : Worklist(wl) {}
+  SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl,
+                       SmallPtrSet<SDNode*, 128> &inwl)
+    : Worklist(wl), InWorklist(inwl) {}
 
+  void RemoveFromWorklist(SDNode *N) {
+    if (!InWorklist.erase(N)) return;
+    
+    SmallVector<SDNode*, 128>::iterator I =
+    std::find(Worklist.begin(), Worklist.end(), N);
+    assert(I != Worklist.end() && "Not in worklist");
+    
+    *I = Worklist.back();
+    Worklist.pop_back();
+  }
+  
   virtual void NodeDeleted(SDNode *N, SDNode *E) {
-    Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
-                   Worklist.end());
+    RemoveFromWorklist(N);
   }
 
   virtual void NodeUpdated(SDNode *N) {
@@ -480,70 +298,79 @@
 /// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
 void SelectionDAGISel::ShrinkDemandedOps() {
   SmallVector<SDNode*, 128> Worklist;
+  SmallPtrSet<SDNode*, 128> InWorklist;
 
   // Add all the dag nodes to the worklist.
   Worklist.reserve(CurDAG->allnodes_size());
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
-       E = CurDAG->allnodes_end(); I != E; ++I)
+       E = CurDAG->allnodes_end(); I != E; ++I) {
     Worklist.push_back(I);
+    InWorklist.insert(I);
+  }
 
-  APInt Mask;
-  APInt KnownZero;
-  APInt KnownOne;
-
-  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
   while (!Worklist.empty()) {
     SDNode *N = Worklist.pop_back_val();
+    InWorklist.erase(N);
 
     if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+      // Deleting this node may make its operands dead, add them to the worklist
+      // if they aren't already there.
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        if (InWorklist.insert(N->getOperand(i).getNode()))
+          Worklist.push_back(N->getOperand(i).getNode());
+      
       CurDAG->DeleteNode(N);
       continue;
     }
 
     // Run ShrinkDemandedOp on scalar binary operations.
-    if (N->getNumValues() == 1 &&
-        N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
-      unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
-      APInt Demanded = APInt::getAllOnesValue(BitWidth);
-      APInt KnownZero, KnownOne;
-      if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
-                                   KnownZero, KnownOne, TLO) ||
-          (N->getOpcode() == ISD::TRUNCATE &&
-           TrivialTruncElim(SDValue(N, 0), TLO))) {
-        // Revisit the node.
-        Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
-                       Worklist.end());
-        Worklist.push_back(N);
+    if (N->getNumValues() != 1 ||
+        !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger())
+      continue;
+    
+    unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+    APInt Demanded = APInt::getAllOnesValue(BitWidth);
+    APInt KnownZero, KnownOne;
+    if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+                                  KnownZero, KnownOne, TLO) &&
+        (N->getOpcode() != ISD::TRUNCATE ||
+         !TrivialTruncElim(SDValue(N, 0), TLO)))
+      continue;
+    
+    // Revisit the node.
+    assert(!InWorklist.count(N) && "Already in worklist");
+    Worklist.push_back(N);
+    InWorklist.insert(N);
 
-        // Replace the old value with the new one.
-        DEBUG(errs() << "\nReplacing "; 
-              TLO.Old.getNode()->dump(CurDAG);
-              errs() << "\nWith: ";
-              TLO.New.getNode()->dump(CurDAG);
-              errs() << '\n');
+    // Replace the old value with the new one.
+    DEBUG(errs() << "\nShrinkDemandedOps replacing "; 
+          TLO.Old.getNode()->dump(CurDAG);
+          errs() << "\nWith: ";
+          TLO.New.getNode()->dump(CurDAG);
+          errs() << '\n');
 
-        Worklist.push_back(TLO.New.getNode());
+    if (InWorklist.insert(TLO.New.getNode()))
+      Worklist.push_back(TLO.New.getNode());
 
-        SDOPsWorkListRemover DeadNodes(Worklist);
-        CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+    SDOPsWorkListRemover DeadNodes(Worklist, InWorklist);
+    CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
 
-        if (TLO.Old.getNode()->use_empty()) {
-          for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
-               i != e; ++i) {
-            SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
-            if (OpNode->hasOneUse()) {
-              Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
-                                         OpNode), Worklist.end());
-              Worklist.push_back(OpNode);
-            }
-          }
-
-          Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
-                                     TLO.Old.getNode()), Worklist.end());
-          CurDAG->DeleteNode(TLO.Old.getNode());
-        }
+    if (!TLO.Old.getNode()->use_empty()) continue;
+        
+    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+         i != e; ++i) {
+      SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
+      if (OpNode->hasOneUse()) {
+        // Add OpNode to the end of the list to revisit.
+        DeadNodes.RemoveFromWorklist(OpNode);
+        Worklist.push_back(OpNode);
+        InWorklist.insert(OpNode);
       }
     }
+
+    DeadNodes.RemoveFromWorklist(TLO.Old.getNode());
+    CurDAG->DeleteNode(TLO.Old.getNode());
   }
 }
 
@@ -601,7 +428,7 @@
   } while (!Worklist.empty());
 }
 
-void SelectionDAGISel::CodeGenAndEmitDAG() {
+MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
   std::string GroupName;
   if (TimePassesIsEnabled)
     GroupName = "Instruction Selection and Scheduling";
@@ -715,13 +542,13 @@
   DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
-  if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
-
   if (OptLevel != CodeGenOpt::None) {
     ShrinkDemandedOps();
     ComputeLiveOutVRegInfo();
   }
 
+  if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
   // Third, instruction select all of the operations to machine code, adding the
   // code to the MachineBasicBlock.
   if (TimePassesIsEnabled) {
@@ -764,8 +591,7 @@
     delete Scheduler;
   }
 
-  DEBUG(dbgs() << "Selected machine code:\n");
-  DEBUG(BB->dump());
+  return BB;
 }
 
 void SelectionDAGISel::DoInstructionSelection() {
@@ -822,39 +648,72 @@
   DEBUG(errs() << "===== Instruction selection ends:\n");
 
   PostprocessISelDAG();
-  
-  // FIXME: This shouldn't be needed, remove it.
-  CurDAG->RemoveDeadNodes();
 }
 
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) {
+  // Add a label to mark the beginning of the landing pad.  Deletion of the
+  // landing pad can thus be detected via the MachineModuleInfo.
+  MCSymbol *Label = MF->getMMI().addLandingPad(BB);
 
-void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
-                                            MachineFunction &MF,
-                                            MachineModuleInfo *MMI,
-                                            DwarfWriter *DW,
-                                            const TargetInstrInfo &TII) {
+  const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+  BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label);
+
+  // Mark exception register as live in.
+  unsigned Reg = TLI.getExceptionAddressRegister();
+  if (Reg) BB->addLiveIn(Reg);
+
+  // Mark exception selector register as live in.
+  Reg = TLI.getExceptionSelectorRegister();
+  if (Reg) BB->addLiveIn(Reg);
+
+  // FIXME: Hack around an exception handling flaw (PR1508): the personality
+  // function and list of typeids logically belong to the invoke (or, if you
+  // like, the basic block containing the invoke), and need to be associated
+  // with it in the dwarf exception handling tables.  Currently however the
+  // information is provided by an intrinsic (eh.selector) that can be moved
+  // to unexpected places by the optimizers: if the unwind edge is critical,
+  // then breaking it can result in the intrinsics being in the successor of
+  // the landing pad, not the landing pad itself.  This results
+  // in exceptions not being caught because no typeids are associated with
+  // the invoke.  This may not be the only way things can go wrong, but it
+  // is the only way we try to work around for the moment.
+  const BasicBlock *LLVMBB = BB->getBasicBlock();
+  const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+  if (Br && Br->isUnconditional()) { // Critical edge?
+    BasicBlock::const_iterator I, E;
+    for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+      if (isa<EHSelectorInst>(I))
+        break;
+
+    if (I == E)
+      // No catch info found - try to extract some from the successor.
+      CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
+  }
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
   if (EnableFastISel)
-    FastIS = TLI.createFastISel(MF, MMI, DW,
-                                FuncInfo->ValueMap,
-                                FuncInfo->MBBMap,
-                                FuncInfo->StaticAllocaMap
+    FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap,
+                                FuncInfo->StaticAllocaMap,
+                                FuncInfo->PHINodesToUpdate
 #ifndef NDEBUG
                                 , FuncInfo->CatchInfoLost
 #endif
                                 );
 
-  unsigned MDDbgKind = Fn.getContext().getMDKindID("dbg");
-
   // Iterate over all basic blocks in the function.
-  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    BasicBlock *LLVMBB = &*I;
-    BB = FuncInfo->MBBMap[LLVMBB];
+  for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+    const BasicBlock *LLVMBB = &*I;
+    MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB];
 
-    BasicBlock::iterator const Begin = LLVMBB->begin();
-    BasicBlock::iterator const End = LLVMBB->end();
-    BasicBlock::iterator BI = Begin;
+    BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+    BasicBlock::const_iterator const End = LLVMBB->end();
+    BasicBlock::const_iterator BI = Begin;
 
     // Lower any arguments needed in this block if this is the entry block.
     bool SuppressFastISel = false;
@@ -865,7 +724,7 @@
       // fast-isel in the entry block.
       if (FastIS) {
         unsigned j = 1;
-        for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+        for (Function::const_arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
              I != E; ++I, ++j)
           if (Fn.paramHasAttr(j, Attribute::ByVal)) {
             if (EnableFastISelVerbose || EnableFastISelAbort)
@@ -876,85 +735,25 @@
       }
     }
 
-    if (MMI && BB->isLandingPad()) {
-      // Add a label to mark the beginning of the landing pad.  Deletion of the
-      // landing pad can thus be detected via the MachineModuleInfo.
-      unsigned LabelID = MMI->addLandingPad(BB);
-
-      const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL);
-      BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID);
-
-      // Mark exception register as live in.
-      unsigned Reg = TLI.getExceptionAddressRegister();
-      if (Reg) BB->addLiveIn(Reg);
-
-      // Mark exception selector register as live in.
-      Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) BB->addLiveIn(Reg);
-
-      // FIXME: Hack around an exception handling flaw (PR1508): the personality
-      // function and list of typeids logically belong to the invoke (or, if you
-      // like, the basic block containing the invoke), and need to be associated
-      // with it in the dwarf exception handling tables.  Currently however the
-      // information is provided by an intrinsic (eh.selector) that can be moved
-      // to unexpected places by the optimizers: if the unwind edge is critical,
-      // then breaking it can result in the intrinsics being in the successor of
-      // the landing pad, not the landing pad itself.  This results
-      // in exceptions not being caught because no typeids are associated with
-      // the invoke.  This may not be the only way things can go wrong, but it
-      // is the only way we try to work around for the moment.
-      BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
-      if (Br && Br->isUnconditional()) { // Critical edge?
-        BasicBlock::iterator I, E;
-        for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
-          if (isa<EHSelectorInst>(I))
-            break;
-
-        if (I == E)
-          // No catch info found - try to extract some from the successor.
-          CopyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
-      }
-    }
-
+    // Setup an EH landing-pad block.
+    if (BB->isLandingPad())
+      PrepareEHLandingPad(BB);
+    
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
     if (FastIS && !SuppressFastISel) {
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
         CurDAG->setRoot(SDB->getControlRoot());
-        CodeGenAndEmitDAG();
+        BB = CodeGenAndEmitDAG(BB);
         SDB->clear();
       }
       FastIS->startNewBlock(BB);
       // Do FastISel on as many instructions as possible.
       for (; BI != End; ++BI) {
-        // Just before the terminator instruction, insert instructions to
-        // feed PHI nodes in successor blocks.
-        if (isa<TerminatorInst>(BI))
-          if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
-            ++NumFastIselFailures;
-            ResetDebugLoc(SDB, FastIS);
-            if (EnableFastISelVerbose || EnableFastISelAbort) {
-              dbgs() << "FastISel miss: ";
-              BI->dump();
-            }
-            assert(!EnableFastISelAbort &&
-                   "FastISel didn't handle a PHI in a successor");
-            break;
-          }
-
-        SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF);
-
         // Try to select the instruction with FastISel.
-        if (FastIS->SelectInstruction(BI)) {
-          ResetDebugLoc(SDB, FastIS);
+        if (FastIS->SelectInstruction(BI))
           continue;
-        }
-
-        // Clear out the debug location so that it doesn't carry over to
-        // unrelated instructions.
-        ResetDebugLoc(SDB, FastIS);
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
@@ -964,14 +763,14 @@
             BI->dump();
           }
 
-          if (!BI->getType()->isVoidTy()) {
+          if (!BI->getType()->isVoidTy() && !BI->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[BI];
             if (!R)
               R = FuncInfo->CreateRegForValue(BI);
           }
 
           bool HadTailCall = false;
-          SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall);
+          BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall);
 
           // If the call was emitted as a tail call, we're done with the block.
           if (HadTailCall) {
@@ -1007,44 +806,44 @@
     // block.
     if (BI != End) {
       bool HadTailCall;
-      SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
+      BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall);
     }
 
-    FinishBasicBlock();
+    FinishBasicBlock(BB);
+    FuncInfo->PHINodesToUpdate.clear();
   }
 
   delete FastIS;
 }
 
 void
-SelectionDAGISel::FinishBasicBlock() {
+SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
 
   DEBUG(dbgs() << "Target-post-processed machine code:\n");
   DEBUG(BB->dump());
 
   DEBUG(dbgs() << "Total amount of phi nodes to update: "
-               << SDB->PHINodesToUpdate.size() << "\n");
-  DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
+               << FuncInfo->PHINodesToUpdate.size() << "\n");
+  DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
           dbgs() << "Node " << i << " : ("
-                 << SDB->PHINodesToUpdate[i].first
-                 << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
+                 << FuncInfo->PHINodesToUpdate[i].first
+                 << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
 
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
   if (SDB->SwitchCases.empty() &&
       SDB->JTCases.empty() &&
       SDB->BitTestCases.empty()) {
-    for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+    for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       if (!BB->isSuccessor(PHI->getParent()))
         continue;
-      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
-                                                false));
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
-    SDB->PHINodesToUpdate.clear();
     return;
   }
 
@@ -1053,37 +852,38 @@
     if (!SDB->BitTestCases[i].Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->BitTestCases[i].Parent;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDB->visitBitTestHeader(SDB->BitTestCases[i]);
+      SDB->visitBitTestHeader(SDB->BitTestCases[i], BB);
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
+      BB = CodeGenAndEmitDAG(BB);
       SDB->clear();
     }
 
     for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->BitTestCases[i].Cases[j].ThisBB;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
       if (j+1 != ej)
         SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
                               SDB->BitTestCases[i].Reg,
-                              SDB->BitTestCases[i].Cases[j]);
+                              SDB->BitTestCases[i].Cases[j],
+                              BB);
       else
         SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
                               SDB->BitTestCases[i].Reg,
-                              SDB->BitTestCases[i].Cases[j]);
+                              SDB->BitTestCases[i].Cases[j],
+                              BB);
 
 
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
+      BB = CodeGenAndEmitDAG(BB);
       SDB->clear();
     }
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
@@ -1091,10 +891,12 @@
       // from last "case" BB.
       if (PHIBB == SDB->BitTestCases[i].Default) {
         PHI->addOperand(MachineOperand::
-                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
         PHI->addOperand(MachineOperand::
-                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
                                                   back().ThisBB));
       }
@@ -1104,7 +906,8 @@
         MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
         if (cBB->isSuccessor(PHIBB)) {
           PHI->addOperand(MachineOperand::
-                          CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                          CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                    false));
           PHI->addOperand(MachineOperand::CreateMBB(cBB));
         }
       }
@@ -1120,40 +923,42 @@
     if (!SDB->JTCases[i].first.Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->JTCases[i].first.HeaderBB;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first);
+      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+                                BB);
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
+      BB = CodeGenAndEmitDAG(BB);
       SDB->clear();
     }
 
     // Set the current basic block to the mbb we wish to insert the code into
     BB = SDB->JTCases[i].second.MBB;
-    SDB->setCurrentBasicBlock(BB);
     // Emit the code
     SDB->visitJumpTable(SDB->JTCases[i].second);
     CurDAG->setRoot(SDB->getRoot());
-    CodeGenAndEmitDAG();
+    BB = CodeGenAndEmitDAG(BB);
     SDB->clear();
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
       if (PHIBB == SDB->JTCases[i].second.Default) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
         PHI->addOperand
           (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
       if (BB->isSuccessor(PHIBB)) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
       }
     }
@@ -1162,13 +967,13 @@
 
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
-  for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
     assert(PHI->isPHI() &&
            "This is not a machine PHI node that we are updating!");
     if (BB->isSuccessor(PHI->getParent())) {
-      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
-                                                false));
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
   }
@@ -1178,12 +983,11 @@
   for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
     MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
-    SDB->setCurrentBasicBlock(BB);
 
     // Emit the code
-    SDB->visitSwitchCase(SDB->SwitchCases[i]);
+    SDB->visitSwitchCase(SDB->SwitchCases[i], BB);
     CurDAG->setRoot(SDB->getRoot());
-    CodeGenAndEmitDAG();
+    BB = CodeGenAndEmitDAG(BB);
 
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
@@ -1205,11 +1009,11 @@
              ++Phi) {
           // This value for this PHI node is recorded in PHINodesToUpdate.
           for (unsigned pn = 0; ; ++pn) {
-            assert(pn != SDB->PHINodesToUpdate.size() &&
+            assert(pn != FuncInfo->PHINodesToUpdate.size() &&
                    "Didn't find PHI entry!");
-            if (SDB->PHINodesToUpdate[pn].first == Phi) {
+            if (FuncInfo->PHINodesToUpdate[pn].first == Phi) {
               Phi->addOperand(MachineOperand::
-                              CreateReg(SDB->PHINodesToUpdate[pn].second,
+                              CreateReg(FuncInfo->PHINodesToUpdate[pn].second,
                                         false));
               Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
               break;
@@ -1230,8 +1034,6 @@
     SDB->clear();
   }
   SDB->SwitchCases.clear();
-
-  SDB->PHINodesToUpdate.clear();
 }
 
 
@@ -1330,16 +1132,17 @@
   std::vector<SDValue> InOps;
   std::swap(InOps, Ops);
 
-  Ops.push_back(InOps[0]);  // input chain.
-  Ops.push_back(InOps[1]);  // input asm string.
+  Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+  Ops.push_back(InOps[InlineAsm::Op_AsmString]);  // 1
+  Ops.push_back(InOps[InlineAsm::Op_MDNode]);     // 2, !srcloc
 
-  unsigned i = 2, e = InOps.size();
+  unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
   if (InOps[e-1].getValueType() == MVT::Flag)
     --e;  // Don't process a flag operand if it is here.
 
   while (i != e) {
     unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
-    if ((Flags & 7) != 4 /*MEM*/) {
+    if (!InlineAsm::isMemKind(Flags)) {
       // Just skip over this operand, copying the operands verbatim.
       Ops.insert(Ops.end(), InOps.begin()+i,
                  InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
@@ -1349,14 +1152,14 @@
              "Memory operand with multiple values?");
       // Otherwise, this is a memory operand.  Ask the target to select it.
       std::vector<SDValue> SelOps;
-      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
-        llvm_report_error("Could not match memory address.  Inline asm"
-                          " failure!");
-      }
+      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+        report_fatal_error("Could not match memory address.  Inline asm"
+                           " failure!");
 
       // Add this to the output node.
-      Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
-                                              MVT::i32));
+      unsigned NewFlags =
+        InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+      Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
       Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
       i += 2;
     }
@@ -1433,7 +1236,8 @@
 /// IsLegalToFold - Returns true if the specific operand node N of
 /// U can be folded during instruction selection that starts at Root.
 bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
-                                     bool IgnoreChains) const {
+                                     CodeGenOpt::Level OptLevel,
+                                     bool IgnoreChains) {
   if (OptLevel == CodeGenOpt::None) return false;
 
   // If Root use can somehow reach N through a path that that doesn't contain
@@ -1517,14 +1321,6 @@
   return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
 }
 
-SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
-  SDValue Chain = N->getOperand(0);
-  unsigned C = cast<LabelSDNode>(N)->getLabelID();
-  SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
-  return CurDAG->SelectNodeTo(N, TargetOpcode::EH_LABEL,
-                              MVT::Other, Tmp, Chain);
-}
-
 /// GetVBR - decode a vbr encoding whose top bit is set.
 ALWAYS_INLINE static uint64_t
 GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -1580,8 +1376,9 @@
       assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
       CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
       
-      // If the node became dead, delete it.
-      if (ChainNode->use_empty())
+      // If the node became dead and we haven't already seen it, delete it.
+      if (ChainNode->use_empty() &&
+          !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
         NowDeadNodes.push_back(ChainNode);
     }
   }
@@ -1602,8 +1399,9 @@
       CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
                                         InputFlag, &ISU);
       
-      // If the node became dead, delete it.
-      if (FRN->use_empty())
+      // If the node became dead and we haven't already seen it, delete it.
+      if (FRN->use_empty() &&
+          !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
         NowDeadNodes.push_back(FRN);
     }
   }
@@ -1651,7 +1449,8 @@
     
     if (User->getOpcode() == ISD::CopyToReg ||
         User->getOpcode() == ISD::CopyFromReg ||
-        User->getOpcode() == ISD::INLINEASM) {
+        User->getOpcode() == ISD::INLINEASM ||
+        User->getOpcode() == ISD::EH_LABEL) {
       // If their node ID got reset to -1 then they've already been selected.
       // Treat them like a MachineOpcode.
       if (User->getNodeId() == -1)
@@ -1797,9 +1596,9 @@
   // It is possible we're using MorphNodeTo to replace a node with no
   // normal results with one that has a normal result (or we could be
   // adding a chain) and the input could have flags and chains as well.
-  // In this case we need to shifting the operands down.
+  // In this case we need to shift the operands down.
   // FIXME: This is a horrible hack and broken in obscure cases, no worse
-  // than the old isel though.  We should sink this into MorphNodeTo.
+  // than the old isel though.
   int OldFlagResultNo = -1, OldChainResultNo = -1;
 
   unsigned NTMNumResults = Node->getNumValues();
@@ -1875,7 +1674,9 @@
 ALWAYS_INLINE static bool
 CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
             SDNode *N) {
-  return N->getOpcode() == MatcherTable[MatcherIndex++];
+  uint16_t Opc = MatcherTable[MatcherIndex++];
+  Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+  return N->getOpcode() == Opc;
 }
 
 ALWAYS_INLINE static bool
@@ -2011,6 +1812,7 @@
   }
 }
 
+namespace {
 
 struct MatchScope {
   /// FailIndex - If this match fails, this is the index to continue with.
@@ -2032,6 +1834,8 @@
   bool HasChainNodesMatched, HasFlagResultNodesMatched;
 };
 
+}
+
 SDNode *SelectionDAGISel::
 SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                  unsigned TableSize) {
@@ -2042,7 +1846,10 @@
   case ISD::EntryToken:       // These nodes remain the same.
   case ISD::BasicBlock:
   case ISD::Register:
+  //case ISD::VALUETYPE:
+  //case ISD::CONDCODE:
   case ISD::HANDLENODE:
+  case ISD::MDNODE_SDNODE:
   case ISD::TargetConstant:
   case ISD::TargetConstantFP:
   case ISD::TargetConstantPool:
@@ -2055,6 +1862,7 @@
   case ISD::TokenFactor:
   case ISD::CopyFromReg:
   case ISD::CopyToReg:
+  case ISD::EH_LABEL:
     NodeToMatch->setNodeId(-1); // Mark selected.
     return 0;
   case ISD::AssertSext:
@@ -2063,7 +1871,6 @@
                                       NodeToMatch->getOperand(0));
     return 0;
   case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
-  case ISD::EH_LABEL:  return Select_EH_LABEL(NodeToMatch);
   case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);
   }
   
@@ -2127,7 +1934,8 @@
       if (CaseSize == 0) break;
 
       // Get the opcode, add the index to the table.
-      unsigned Opc = MatcherTable[Idx++];
+      uint16_t Opc = MatcherTable[Idx++];
+      Opc |= (unsigned short)MatcherTable[Idx++] << 8;
       if (Opc >= OpcodeOffset.size())
         OpcodeOffset.resize((Opc+1)*2);
       OpcodeOffset[Opc] = Idx;
@@ -2141,6 +1949,9 @@
   
   while (1) {
     assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+    unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
     BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
     switch (Opcode) {
     case OPC_Scope: {
@@ -2163,6 +1974,9 @@
         
         FailIndex = MatcherIndex+NumToSkip;
         
+        unsigned MatcherIndexOfPredicate = MatcherIndex;
+        (void)MatcherIndexOfPredicate; // silence warning.
+        
         // If we can't evaluate this predicate without pushing a scope (e.g. if
         // it is a 'MoveParent') or if the predicate succeeds on this node, we
         // push the scope and evaluate the full predicate chain.
@@ -2172,9 +1986,10 @@
         if (!Result)
           break;
         
-        DEBUG(errs() << "  Skipped scope entry at index " << MatcherIndex
-              << " continuing at " << FailIndex << "\n");
-
+        DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "
+                     << "index " << MatcherIndexOfPredicate
+                     << ", continuing at " << FailIndex << "\n");
+        ++NumDAGIselRetries;
         
         // Otherwise, we know that this case of the Scope is guaranteed to fail,
         // move to the next case.
@@ -2280,8 +2095,11 @@
           CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
         if (CaseSize == 0) break;
 
+        uint16_t Opc = MatcherTable[MatcherIndex++];
+        Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
         // If the opcode matches, then we will execute this case.
-        if (CurNodeOpcode == MatcherTable[MatcherIndex++])
+        if (CurNodeOpcode == Opc)
           break;
       
         // Otherwise, skip over this case.
@@ -2370,7 +2188,8 @@
       if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
                               NodeToMatch) ||
           !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
-                         NodeToMatch, true/*We validate our own chains*/))
+                         NodeToMatch, OptLevel,
+                         true/*We validate our own chains*/))
         break;
       
       continue;
@@ -2410,6 +2229,35 @@
       continue;
     }
         
+    case OPC_EmitMergeInputChains1_0:    // OPC_EmitMergeInputChains, 1, 0
+    case OPC_EmitMergeInputChains1_1: {  // OPC_EmitMergeInputChains, 1, 1
+      // These are space-optimized forms of OPC_EmitMergeInputChains.
+      assert(InputChain.getNode() == 0 &&
+             "EmitMergeInputChains should be the first chain producing node");
+      assert(ChainNodesMatched.empty() &&
+             "Should only have one EmitMergeInputChains per match");
+      
+      // Read all of the chained nodes.
+      unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+        
+      // FIXME: What if other value results of the node have uses not matched
+      // by this pattern?
+      if (ChainNodesMatched.back() != NodeToMatch &&
+          !RecordedNodes[RecNo].hasOneUse()) {
+        ChainNodesMatched.clear();
+        break;
+      }
+      
+      // Merge the input chains if they are not intra-pattern references.
+      InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+      
+      if (InputChain.getNode() == 0)
+        break;  // Failed to merge.
+      continue;
+    }
+        
     case OPC_EmitMergeInputChains: {
       assert(InputChain.getNode() == 0 &&
              "EmitMergeInputChains should be the first chain producing node");
@@ -2628,14 +2476,10 @@
         assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
         SDValue Res = RecordedNodes[ResSlot];
         
-        // FIXME2: Eliminate this horrible hack by fixing the 'Gen' program
-        // after (parallel) on input patterns are removed.  This would also
-        // allow us to stop encoding #results in OPC_CompleteMatch's table
-        // entry.
-        if (NodeToMatch->getNumValues() <= i ||
-            NodeToMatch->getValueType(i) == MVT::Other ||
-            NodeToMatch->getValueType(i) == MVT::Flag)
-          break;
+        assert(i < NodeToMatch->getNumValues() &&
+               NodeToMatch->getValueType(i) != MVT::Other &&
+               NodeToMatch->getValueType(i) != MVT::Flag &&
+               "Invalid number of results to complete!");
         assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
                 NodeToMatch->getValueType(i) == MVT::iPTR ||
                 Res.getValueType() == MVT::iPTR ||
@@ -2666,6 +2510,8 @@
     // If the code reached this point, then the match failed.  See if there is
     // another child to try in the current 'Scope', otherwise pop it until we
     // find a case to check.
+    DEBUG(errs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
+    ++NumDAGIselRetries;
     while (1) {
       if (MatchScopes.empty()) {
         CannotYetSelect(NodeToMatch);
@@ -2680,13 +2526,12 @@
       NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
       N = NodeStack.back();
 
-      DEBUG(errs() << "  Match failed at index " << MatcherIndex
-                   << " continuing at " << LastScope.FailIndex << "\n");
-    
       if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
         MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
       MatcherIndex = LastScope.FailIndex;
       
+      DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n");
+    
       InputChain = LastScope.InputChain;
       InputFlag = LastScope.InputFlag;
       if (!LastScope.HasChainNodesMatched)
@@ -2737,7 +2582,7 @@
     else
       Msg << "unknown intrinsic #" << iid;
   }
-  llvm_report_error(Msg.str());
+  report_fatal_error(Msg.str());
 }
 
 char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8d0d884..f59f851 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -175,17 +175,25 @@
   Names[RTLIB::FLOOR_F64] = "floor";
   Names[RTLIB::FLOOR_F80] = "floorl";
   Names[RTLIB::FLOOR_PPCF128] = "floorl";
+  Names[RTLIB::COPYSIGN_F32] = "copysignf";
+  Names[RTLIB::COPYSIGN_F64] = "copysign";
+  Names[RTLIB::COPYSIGN_F80] = "copysignl";
+  Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
   Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+  Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+  Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
   Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
   Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
   Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
   Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
   Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
-  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8";
-  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16";
+  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
   Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
   Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
   Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+  Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+  Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
   Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
   Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
   Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
@@ -195,11 +203,13 @@
   Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
   Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
   Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
-  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8";
-  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16";
+  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
   Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
   Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
   Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+  Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+  Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
   Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
   Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
   Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
@@ -270,6 +280,7 @@
     if (RetVT == MVT::f64)
       return FPEXT_F32_F64;
   }
+
   return UNKNOWN_LIBCALL;
 }
 
@@ -289,6 +300,7 @@
     if (OpVT == MVT::ppcf128)
       return FPROUND_PPCF128_F64;
   }
+
   return UNKNOWN_LIBCALL;
 }
 
@@ -307,6 +319,10 @@
     if (RetVT == MVT::i128)
       return FPTOSINT_F32_I128;
   } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F64_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F64_I16;
     if (RetVT == MVT::i32)
       return FPTOSINT_F64_I32;
     if (RetVT == MVT::i64)
@@ -346,6 +362,10 @@
     if (RetVT == MVT::i128)
       return FPTOUINT_F32_I128;
   } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F64_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F64_I16;
     if (RetVT == MVT::i32)
       return FPTOUINT_F64_I32;
     if (RetVT == MVT::i64)
@@ -461,14 +481,14 @@
 }
 
 /// NOTE: The constructor takes ownership of TLOF.
-TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
+TargetLowering::TargetLowering(const TargetMachine &tm,
+                               const TargetLoweringObjectFile *tlof)
   : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
   memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
   memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
-  memset(ConvertActions, 0, sizeof(ConvertActions));
   memset(CondCodeActions, 0, sizeof(CondCodeActions));
 
   // Set default actions for various operations.
@@ -702,7 +722,7 @@
       unsigned NElts = VT.getVectorNumElements();
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
         EVT SVT = (MVT::SimpleValueType)nVT;
-        if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
+        if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
             SVT.getVectorNumElements() > NElts && NElts != 1) {
           TransformToType[i] = SVT;
           ValueTypeActions.setTypeAction(VT, Promote);
@@ -793,20 +813,6 @@
   return 1;
 }
 
-/// getWidenVectorType: given a vector type, returns the type to widen to
-/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
-/// If there is no vector type that we want to widen to, returns MVT::Other
-/// When and where to widen is target dependent based on the cost of
-/// scalarizing vs using the wider vector type.
-EVT TargetLowering::getWidenVectorType(EVT VT) const {
-  assert(VT.isVector());
-  if (isTypeLegal(VT))
-    return VT;
- 
-  // Default is not to widen until moved to LegalizeTypes
-  return MVT::Other;
-}
-
 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 /// function arguments in the caller parameter area.  This is the actual
 /// alignment, not its logarithm.
@@ -1275,8 +1281,9 @@
     // variable.  The low bit of the shift cannot be an input sign bit unless
     // the shift amount is >= the size of the datatype, which is undefined.
     if (DemandedMask == 1)
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
-                                               Op.getOperand(0), Op.getOperand(1)));
+      return TLO.CombineTo(Op,
+                           TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+                                           Op.getOperand(0), Op.getOperand(1)));
 
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       EVT VT = Op.getValueType();
@@ -1461,23 +1468,29 @@
       case ISD::SRL:
         // Shrink SRL by a constant if none of the high bits shifted in are
         // demanded.
-        if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
-          APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
-                                                 OperandBitWidth - BitWidth);
-          HighBits = HighBits.lshr(ShAmt->getZExtValue());
-          HighBits.trunc(BitWidth);
-          
-          if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
-            // None of the shifted in bits are needed.  Add a truncate of the
-            // shift input, then shift it.
-            SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
-                                                 Op.getValueType(), 
-                                                 In.getOperand(0));
-            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
-                                                     Op.getValueType(),
-                                                     NewTrunc, 
-                                                     In.getOperand(1)));
-          }
+        if (TLO.LegalTypes() &&
+            !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+          // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+          // undesirable.
+          break;
+        ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+        if (!ShAmt)
+          break;
+        APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+                                               OperandBitWidth - BitWidth);
+        HighBits = HighBits.lshr(ShAmt->getZExtValue());
+        HighBits.trunc(BitWidth);
+
+        if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+          // None of the shifted in bits are needed.  Add a truncate of the
+          // shift input, then shift it.
+          SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+                                             Op.getValueType(), 
+                                             In.getOperand(0));
+          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+                                                   Op.getValueType(),
+                                                   NewTrunc, 
+                                                   In.getOperand(1)));
         }
         break;
       }
@@ -2241,7 +2254,7 @@
 
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
                                     int64_t &Offset) const {
   if (isa<GlobalAddressSDNode>(N)) {
     GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
new file mode 100644
index 0000000..d20477f
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -0,0 +1,21 @@
+//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+using namespace llvm;
+
+TargetSelectionDAGInfo::TargetSelectionDAGInfo() {
+}
+
+TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 0e6d479..5240bef 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -160,7 +160,7 @@
           Args.clear();
           Args.append(CI->op_begin() + 1, CI->op_end());
 
-          InvokeInst *II = InvokeInst::Create(CI->getOperand(0),
+          InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
                                               NewBB, CleanupBB,
                                               Args.begin(), Args.end(),
                                               CI->getName(), CallBB);
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index ce72b2f..543278b 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -58,11 +59,6 @@
                cl::desc("Avoid coalescing cross register class copies"),
                cl::init(false), cl::Hidden);
 
-static cl::opt<bool>
-PhysJoinTweak("tweak-phys-join-heuristics",
-               cl::desc("Tweak heuristics for joining phys reg with vr"),
-               cl::init(false), cl::Hidden);
-
 static RegisterPass<SimpleRegisterCoalescing>
 X("simple-register-coalescing", "Simple Register Coalescing");
 
@@ -808,7 +804,8 @@
           CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
         // If the use is a copy and it won't be coalesced away, and its source
         // is defined by a trivial computation, try to rematerialize it instead.
-        if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
+        if (!JoinedCopies.count(UseMI) &&
+            ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
                                     CopyDstSubIdx, UseMI))
           continue;
       }
@@ -1172,20 +1169,44 @@
 /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
 /// two virtual registers from different register classes.
 bool
-SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
-                                                unsigned SmallReg,
-                                                unsigned Threshold) {
-  // Then make sure the intervals are *short*.
-  LiveInterval &LargeInt = li_->getInterval(LargeReg);
-  LiveInterval &SmallInt = li_->getInterval(SmallReg);
-  unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
-  unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
-  if (LargeSize > Threshold) {
-    unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
-                                       mri_->use_nodbg_end());
-    unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
-                                       mri_->use_nodbg_end());
-    if (SmallUses*LargeSize < LargeUses*SmallSize)
+SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
+                                                unsigned DstReg,
+                                             const TargetRegisterClass *SrcRC,
+                                             const TargetRegisterClass *DstRC,
+                                             const TargetRegisterClass *NewRC) {
+  unsigned NewRCCount = allocatableRCRegs_[NewRC].count();
+  // This heuristics is good enough in practice, but it's obviously not *right*.
+  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+  // out all but the most restrictive register classes.
+  if (NewRCCount > 4 ||
+      // Early exit if the function is fairly small, coalesce aggressively if
+      // that's the case. For really special register classes with 3 or
+      // fewer registers, be a bit more careful.
+      (li_->getFuncInstructionCount() / NewRCCount) < 8)
+    return true;
+  LiveInterval &SrcInt = li_->getInterval(SrcReg);
+  LiveInterval &DstInt = li_->getInterval(DstReg);
+  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+  if (SrcSize <= NewRCCount && DstSize <= NewRCCount)
+    return true;
+  // Estimate *register use density*. If it doubles or more, abort.
+  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+                                   mri_->use_nodbg_end());
+  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+                                   mri_->use_nodbg_end());
+  float NewDensity = ((float)(SrcUses + DstUses) / (SrcSize + DstSize)) /
+    NewRCCount;
+  if (SrcRC != NewRC && SrcSize > NewRCCount) {
+    unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count();
+    float Density = ((float)SrcUses / SrcSize) / SrcRCCount;
+    if (NewDensity > Density * 2.0f)
+      return false;
+  }
+  if (DstRC != NewRC && DstSize > NewRCCount) {
+    unsigned DstRCCount = allocatableRCRegs_[DstRC].count();
+    float Density = ((float)DstUses / DstSize) / DstRCCount;
+    if (NewDensity > Density * 2.0f)
       return false;
   }
   return true;
@@ -1260,10 +1281,10 @@
   RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
   assert(RealDstReg && "Invalid extract_subreg instruction!");
 
+  LiveInterval &RHS = li_->getInterval(SrcReg);
   // For this type of EXTRACT_SUBREG, conservatively
   // check if the live interval of the source register interfere with the
   // actual super physical register we are trying to coalesce with.
-  LiveInterval &RHS = li_->getInterval(SrcReg);
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
     DEBUG({
@@ -1273,7 +1294,11 @@
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
-    if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+    // Do not check DstReg or its sub-register. JoinIntervals() will take care
+    // of that.
+    if (*SR != DstReg &&
+        !tri_->isSubRegister(DstReg, *SR) &&
+        li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
           dbgs() << "Interfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
@@ -1294,9 +1319,9 @@
   RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
   assert(RealSrcReg && "Invalid extract_subreg instruction!");
 
-  LiveInterval &RHS = li_->getInterval(DstReg);
+  LiveInterval &LHS = li_->getInterval(DstReg);
   if (li_->hasInterval(RealSrcReg) &&
-      RHS.overlaps(li_->getInterval(RealSrcReg))) {
+      LHS.overlaps(li_->getInterval(RealSrcReg))) {
     DEBUG({
         dbgs() << "Interfere with register ";
         li_->getInterval(RealSrcReg).print(dbgs(), tri_);
@@ -1304,7 +1329,11 @@
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
-    if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+    // Do not check SrcReg or its sub-register. JoinIntervals() will take care
+    // of that.
+    if (*SR != SrcReg &&
+        !tri_->isSubRegister(SrcReg, *SR) &&
+        li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
           dbgs() << "Interfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
@@ -1436,7 +1465,6 @@
   const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
   const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
   const TargetRegisterClass *NewRC = NULL;
-  MachineBasicBlock *CopyMBB = CopyMI->getParent();
   unsigned RealDstReg = 0;
   unsigned RealSrcReg = 0;
   if (isExtSubReg || isInsSubReg || isSubRegToReg) {
@@ -1476,6 +1504,9 @@
         return false; // Not coalescable.
       }
 
+      // FIXME: The following checks are somewhat conservative. Perhaps a better
+      // way to implement this is to treat this as coalescing a vr with the
+      // super physical register.
       if (isExtSubReg) {
         if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
           return false; // Not coalescable
@@ -1511,10 +1542,11 @@
           return false;  // Not coalescable
         }
 
-        unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
-        unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
-        unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
-        if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
+        if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+          DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: "
+                       << SrcRC->getName() << "/"
+                       << DstRC->getName() << " -> "
+                       << NewRC->getName() << ".\n");
           Again = true;  // May be possible to coalesce later.
           return false;
         }
@@ -1562,49 +1594,40 @@
       }
     }
 
-    unsigned LargeReg = SrcReg;
-    unsigned SmallReg = DstReg;
-
     // Now determine the register class of the joined register.
-    if (isExtSubReg) {
-      if (SubIdx && DstRC && DstRC->isASubClass()) {
-        // This is a move to a sub-register class. However, the source is a
-        // sub-register of a larger register class. We don't know what should
-        // the register class be. FIXME.
-        Again = true;
-        return false;
+    if (!SrcIsPhys && !DstIsPhys) {
+      if (isExtSubReg) {
+        NewRC =
+          SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC;
+      } else if (isInsSubReg) {
+        NewRC =
+          SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC;
+      } else {
+        NewRC = getCommonSubClass(SrcRC, DstRC);
       }
-      if (!DstIsPhys && !SrcIsPhys)
-        NewRC = SrcRC;
-    } else if (!SrcIsPhys && !DstIsPhys) {
-      NewRC = getCommonSubClass(SrcRC, DstRC);
+
       if (!NewRC) {
         DEBUG(dbgs() << "\tDisjoint regclasses: "
                      << SrcRC->getName() << ", "
                      << DstRC->getName() << ".\n");
         return false;           // Not coalescable.
       }
-      if (DstRC->getSize() > SrcRC->getSize())
-        std::swap(LargeReg, SmallReg);
-    }
 
-    // If we are joining two virtual registers and the resulting register
-    // class is more restrictive (fewer register, smaller size). Check if it's
-    // worth doing the merge.
-    if (!SrcIsPhys && !DstIsPhys &&
-        (isExtSubReg || DstRC->isASubClass()) &&
-        !isWinToJoinCrossClass(LargeReg, SmallReg,
-                               allocatableRCRegs_[NewRC].count())) {
-      DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
-                   << SrcRC->getName() << "/"
-                   << DstRC->getName() << " -> "
-                   << NewRC->getName() << ".\n");
-      // Allow the coalescer to try again in case either side gets coalesced to
-      // a physical register that's compatible with the other side. e.g.
-      // r1024 = MOV32to32_ r1025
-      // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
-      Again = true;  // May be possible to coalesce later.
-      return false;
+      // If we are joining two virtual registers and the resulting register
+      // class is more restrictive (fewer register, smaller size). Check if it's
+      // worth doing the merge.
+      if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+        DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: "
+                     << SrcRC->getName() << "/"
+                     << DstRC->getName() << " -> "
+                     << NewRC->getName() << ".\n");
+        // Allow the coalescer to try again in case either side gets coalesced to
+        // a physical register that's compatible with the other side. e.g.
+        // r1024 = MOV32to32_ r1025
+        // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+        Again = true;  // May be possible to coalesce later.
+        return false;
+      }
     }
   }
 
@@ -1628,14 +1651,14 @@
   // Save a copy of the virtual register live interval. We'll manually
   // merge this into the "real" physical register live interval this is
   // coalesced with.
-  LiveInterval *SavedLI = 0;
+  OwningPtr<LiveInterval> SavedLI;
   if (RealDstReg)
-    SavedLI = li_->dupInterval(&SrcInt);
+    SavedLI.reset(li_->dupInterval(&SrcInt));
   else if (RealSrcReg)
-    SavedLI = li_->dupInterval(&DstInt);
+    SavedLI.reset(li_->dupInterval(&DstInt));
 
-  // Check if it is necessary to propagate "isDead" property.
   if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
+    // Check if it is necessary to propagate "isDead" property.
     MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false);
     bool isDead = mopd->isDead();
 
@@ -1644,48 +1667,42 @@
     // these are not spillable! If the destination interval uses are far away,
     // think twice about coalescing them!
     if (!isDead && (SrcIsPhys || DstIsPhys)) {
-      // If the copy is in a loop, take care not to coalesce aggressively if the
-      // src is coming in from outside the loop (or the dst is out of the loop).
-      // If it's not in a loop, then determine whether to join them base purely
-      // by the length of the interval.
-      if (PhysJoinTweak) {
-        if (SrcIsPhys) {
-          if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
-            mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
-            ++numAborts;
-            DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
-            Again = true;  // May be possible to coalesce later.
-            return false;
-          }
-        } else {
-          if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
-            mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
-            ++numAborts;
-            DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
-            Again = true;  // May be possible to coalesce later.
-            return false;
-          }
-        }
-      } else {
-        // If the virtual register live interval is long but it has low use
-        // density, do not join them, instead mark the physical register as its
-        // allocation preference.
-        LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
-        unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
-        unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
-        const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
-        unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
-        unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
-        float Ratio = 1.0 / Threshold;
-        if (Length > Threshold &&
-            (((float)std::distance(mri_->use_nodbg_begin(JoinVReg),
-                                   mri_->use_nodbg_end()) / Length) < Ratio)) {
-          mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
-          ++numAborts;
-          DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
-          Again = true;  // May be possible to coalesce later.
-          return false;
-        }
+      // If the virtual register live interval is long but it has low use
+      // density, do not join them, instead mark the physical register as its
+      // allocation preference.
+      LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
+      LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt;
+      unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
+      unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
+
+      // Don't join with physregs that have a ridiculous number of live
+      // ranges. The data structure performance is really bad when that
+      // happens.
+      if (JoinPInt.ranges.size() > 1000) {
+        mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
+        ++numAborts;
+        DEBUG(dbgs()
+              << "\tPhysical register live interval too complicated, abort!\n");
+        return false;
+      }
+
+      const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
+      unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+      unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+      float Ratio = 1.0 / Threshold;
+      if (Length > Threshold &&
+          (((float)std::distance(mri_->use_nodbg_begin(JoinVReg),
+                                 mri_->use_nodbg_end()) / Length) < Ratio)) {
+        // Before giving up coalescing, if definition of source is defined by
+        // trivial computation, try rematerializing it.
+        if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
+          return true;
+
+        mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
+        ++numAborts;
+        DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+        Again = true;  // May be possible to coalesce later.
+        return false;
       }
     }
   }
@@ -1696,16 +1713,15 @@
   // been modified, so we can use this information below to update aliases.
   bool Swapped = false;
   // If SrcInt is implicitly defined, it's safe to coalesce.
-  bool isEmpty = SrcInt.empty();
-  if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
-    // Only coalesce an empty interval (defined by implicit_def) with
-    // another interval which has a valno defined by the CopyMI and the CopyMI
-    // is a kill of the implicit def.
-    DEBUG(dbgs() << "Not profitable!\n");
-    return false;
-  }
-
-  if (!isEmpty && !JoinIntervals(DstInt, SrcInt, Swapped)) {
+  if (SrcInt.empty()) {
+    if (!CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
+      // Only coalesce an empty interval (defined by implicit_def) with
+      // another interval which has a valno defined by the CopyMI and the CopyMI
+      // is a kill of the implicit def.
+      DEBUG(dbgs() << "Not profitable!\n");
+      return false;
+    }
+  } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) {
     // Coalescing failed.
 
     // If definition of source is defined by trivial computation, try
@@ -1830,7 +1846,7 @@
   // Manually deleted the live interval copy.
   if (SavedLI) {
     SavedLI->clear();
-    delete SavedLI;
+    SavedLI.reset();
   }
 
   // If resulting interval has a preference that no longer fits because of subreg
@@ -2193,7 +2209,7 @@
         li_->intervalIsInOneMBB(RHS) &&
         li_->getApproximateInstructionCount(RHS) <= 10) {
       // Perform a more exhaustive check for some common cases.
-      if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
+      if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
         return false;
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
@@ -2210,7 +2226,7 @@
     if (LHS.containsOneValue() &&
         li_->getApproximateInstructionCount(LHS) <= 10) {
       // Perform a more exhaustive check for some common cases.
-      if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
+      if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
         return false;
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
@@ -2745,7 +2761,7 @@
             // delete them later.
             DoDelete = false;
         }
-        if (MI->registerDefIsDead(DstReg)) {
+        if (MI->allDefsAreDead()) {
           LiveInterval &li = li_->getInterval(DstReg);
           if (!ShortenDeadCopySrcLiveRange(li, MI))
             ShortenDeadCopyLiveRange(li, MI);
@@ -2776,7 +2792,7 @@
           if (MO.isDead())
             continue;
           if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
-              !mri_->use_empty(Reg)) {
+              !mri_->use_nodbg_empty(Reg)) {
             isDead = false;
             break;
           }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index f668064..bd2d0bb 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -179,8 +179,11 @@
 
     /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
     /// two virtual registers from different register classes.
-    bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
-                               unsigned Threshold);
+    bool isWinToJoinCrossClass(unsigned SrcReg,
+                               unsigned DstReg,
+                               const TargetRegisterClass *SrcRC,
+                               const TargetRegisterClass *DstRC,
+                               const TargetRegisterClass *NewRC);
 
     /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
     /// register with a physical register, check if any of the virtual register
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 7ba4403..63c5554 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -46,7 +46,6 @@
 /// Utility class for spillers.
 class SpillerBase : public Spiller {
 protected:
-
   MachineFunction *mf;
   LiveIntervals *lis;
   MachineFrameInfo *mfi;
@@ -160,9 +159,11 @@
 
     return added;
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
 
 /// Spills any live range using the spill-everywhere method with no attempt at
 /// folding.
@@ -178,9 +179,12 @@
     // Ignore spillIs - we don't use it.
     return trivialSpillEverywhere(li);
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
+
 /// Falls back on LiveIntervals::addIntervalsForSpills.
 class StandardSpiller : public Spiller {
 protected:
@@ -198,9 +202,12 @@
                                    SlotIndex*) {
     return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
+
 /// When a call to spill is placed this spiller will first try to break the
 /// interval up into its component values (one new interval per value).
 /// If this fails, or if a call is placed to spill a previously split interval
@@ -513,15 +520,16 @@
 
 };
 
-}
+} // end anonymous namespace
+
 
 llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
                                    const MachineLoopInfo *loopInfo,
                                    VirtRegMap *vrm) {
   switch (spillerOpt) {
-    case trivial: return new TrivialSpiller(mf, lis, vrm); break;
-    case standard: return new StandardSpiller(lis, loopInfo, vrm); break;
-    case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break;
-    default: llvm_unreachable("Unreachable!"); break;
+  default: assert(0 && "unknown spiller");
+  case trivial: return new TrivialSpiller(mf, lis, vrm);
+  case standard: return new StandardSpiller(lis, loopInfo, vrm);
+  case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm);
   }
 }
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3223e53..aa6e2b4 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -495,7 +495,7 @@
     if (InstrCount == MaxDuplicateCount) return false;
     // Remember if we saw a call.
     if (I->getDesc().isCall()) HasCall = true;
-    if (!I->isPHI())
+    if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
   }
   // Heuristically, don't tail-duplicate calls if it would expand code size,
@@ -648,17 +648,6 @@
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
 
-  // If there are any labels in the basic block, unregister them from
-  // MachineModuleInfo.
-  if (MMI && !MBB->empty()) {
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-         I != E; ++I) {
-      if (I->isLabel())
-        // The label ID # is always operand #0, an immediate.
-        MMI->InvalidateLabel(I->getOperand(0).getImm());
-    }
-  }
-
   // Remove the block.
   MBB->eraseFromParent();
 }
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index e9e998f..0ad6619 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -40,7 +40,7 @@
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "Don't know how to commute: " << *MI;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 
   assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d127f53..9f95993 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -38,108 +38,88 @@
 //===----------------------------------------------------------------------===//
 //                                  ELF
 //===----------------------------------------------------------------------===//
-typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
-
-TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
-  // If we have the section uniquing map, free it.
-  delete (ELFUniqueMapTy*)UniquingMap;
-}
-
-const MCSection *TargetLoweringObjectFileELF::
-getELFSection(StringRef Section, unsigned Type, unsigned Flags,
-              SectionKind Kind, bool IsExplicit) const {
-  if (UniquingMap == 0)
-    UniquingMap = new ELFUniqueMapTy();
-  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionELF *&Entry = Map[Section];
-  if (Entry) return Entry;
-
-  return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
-                                      getContext());
-}
 
 void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
                                              const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((ELFUniqueMapTy*)UniquingMap)->clear();
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   BSSSection =
-    getELFSection(".bss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
+    getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getBSS());
 
   TextSection =
-    getELFSection(".text", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getText());
+    getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_EXECINSTR |
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getText());
 
   DataSection =
-    getELFSection(".data", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
 
   ReadOnlySection =
-    getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC,
-                  SectionKind::getReadOnly());
+    getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
 
   TLSDataSection =
-    getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
+    getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                               MCSectionELF::SHF_WRITE,
+                               SectionKind::getThreadData());
 
   TLSBSSSection =
-    getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
+    getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                               MCSectionELF::SHF_WRITE,
+                               SectionKind::getThreadBSS());
 
   DataRelSection =
-    getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   DataRelLocalSection =
-    getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRelLocal());
+    getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRelLocal());
 
   DataRelROSection =
-    getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRel());
+    getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRel());
 
   DataRelROLocalSection =
-    getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRelLocal());
+    getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRelLocal());
 
   MergeableConst4Section =
-    getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst4());
+    getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst4());
 
   MergeableConst8Section =
-    getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst8());
+    getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst8());
 
   MergeableConst16Section =
-    getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst16());
+    getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst16());
 
   StaticCtorSection =
-    getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   StaticDtorSection =
-    getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   // Exception Handling Sections.
 
@@ -148,47 +128,48 @@
   // runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
   LSDASection =
-    getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
+    getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
   EHFrameSection =
-    getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   // Debug Info Sections.
   DwarfAbbrevSection =
-    getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfInfoSection =
-    getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfLineSection =
-    getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfFrameSection =
-    getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfStrSection =
-    getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfLocSection =
-    getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfARangesSection =
-    getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfRangesSection =
-    getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
 }
 
 
@@ -277,9 +258,9 @@
   // Infer section flags from the section name if we can.
   Kind = getELFKindForNamedSection(SectionName, Kind);
 
-  return getELFSection(SectionName,
-                       getELFSectionType(SectionName, Kind),
-                       getELFSectionFlags(Kind), Kind, true);
+  return getContext().getELFSection(SectionName,
+                                    getELFSectionType(SectionName, Kind),
+                                    getELFSectionFlags(Kind), Kind, true);
 }
 
 static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
@@ -298,19 +279,54 @@
   return ".gnu.linkonce.d.rel.ro.";
 }
 
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+  if (Kind.isText())                 return ".text.";
+  if (Kind.isReadOnly())             return ".rodata.";
+
+  if (Kind.isThreadData())           return ".tdata.";
+  if (Kind.isThreadBSS())            return ".tbss.";
+
+  if (Kind.isDataNoRel())            return ".data.";
+  if (Kind.isDataRelLocal())         return ".data.rel.local.";
+  if (Kind.isDataRel())              return ".data.rel.";
+  if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return ".data.rel.ro.";
+}
+
+
 const MCSection *TargetLoweringObjectFileELF::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
+  // If we have -ffunction-section or -fdata-section then we should emit the
+  // global value to a uniqued section specifically for it.
+  bool EmitUniquedSection;
+  if (Kind.isText())
+    EmitUniquedSection = TM.getFunctionSections();
+  else 
+    EmitUniquedSection = TM.getDataSections();
 
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
-  if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
-    const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
-    SmallString<128> Name;
-    Name.append(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false);
-    return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
-                         getELFSectionFlags(Kind), Kind);
+  if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+      !Kind.isCommon() && !Kind.isBSS()) {
+    const char *Prefix;
+    if (GV->isWeakForLinker())
+      Prefix = getSectionPrefixForUniqueGlobal(Kind);
+    else {
+      assert(EmitUniquedSection);
+      Prefix = getSectionPrefixForGlobal(Kind);
+    }
+
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append(Sym->getName().begin(), Sym->getName().end());
+    return getContext().getELFSection(Name.str(),
+                                      getELFSectionType(Name.str(), Kind),
+                                      getELFSectionFlags(Kind), Kind);
   }
 
   if (Kind.isText()) return TextSection;
@@ -335,11 +351,11 @@
 
 
     std::string Name = SizeSpec + utostr(Align);
-    return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
-                         MCSectionELF::SHF_ALLOC |
-                         MCSectionELF::SHF_MERGE |
-                         MCSectionELF::SHF_STRINGS,
-                         Kind);
+    return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS,
+                                      MCSectionELF::SHF_ALLOC |
+                                      MCSectionELF::SHF_MERGE |
+                                      MCSectionELF::SHF_STRINGS,
+                                      Kind);
   }
 
   if (Kind.isMergeableConst()) {
@@ -391,8 +407,9 @@
 }
 
 const MCExpr *TargetLoweringObjectFileELF::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                             MachineModuleInfo *MMI, unsigned Encoding) const {
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI,
+                               unsigned Encoding, MCStreamer &Streamer) const {
 
   if (Encoding & dwarf::DW_EH_PE_indirect) {
     MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
@@ -403,89 +420,61 @@
 
     // Add information about the stub reference to ELFMMI so that the stub
     // gets emitted by the asmprinter.
-    MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
-    MCSymbol *&StubSym = ELFMMI.getGVStubEntry(Sym);
-    if (StubSym == 0) {
-      Name.clear();
-      Mang->getNameWithPrefix(Name, GV, false);
-      StubSym = getContext().GetOrCreateSymbol(Name.str());
+    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
     return TargetLoweringObjectFile::
-      getSymbolForDwarfReference(Sym, MMI,
-                                 Encoding & ~dwarf::DW_EH_PE_indirect);
+      getExprForDwarfReference(SSym, Mang, MMI,
+                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
-    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
 //===----------------------------------------------------------------------===//
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-
-TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
-  // If we have the MachO uniquing map, free it.
-  delete (MachOUniqueMapTy*)UniquingMap;
-}
-
-
-const MCSectionMachO *TargetLoweringObjectFileMachO::
-getMachOSection(StringRef Segment, StringRef Section,
-                unsigned TypeAndAttributes,
-                unsigned Reserved2, SectionKind Kind) const {
-  // We unique sections by their segment/section pair.  The returned section
-  // may not have the same flags as the requested section, if so this should be
-  // diagnosed by the client as an error.
-
-  // Create the map if it doesn't already exist.
-  if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
-  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
-
-  // Form the name to look up.
-  SmallString<64> Name;
-  Name += Segment;
-  Name.push_back(',');
-  Name += Section;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionMachO *&Entry = Map[Name.str()];
-  if (Entry) return Entry;
-
-  // Otherwise, return a new section.
-  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
-                                        Reserved2, Kind, getContext());
-}
-
-
 void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                                const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((MachOUniqueMapTy*)UniquingMap)->clear();
+  // _foo.eh symbols are currently always exported so that the linker knows
+  // about them.  This is not necessary on 10.6 and later, but it
+  // doesn't hurt anything.
+  // FIXME: I need to get this from Triple.
+  IsFunctionEHSymbolGlobal = true;
+  IsFunctionEHFrameSymbolPrivate = false;
+  SupportsWeakOmittedEHFrame = false;
+  
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   TextSection // .text
-    = getMachOSection("__TEXT", "__text",
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__text",
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
   DataSection // .data
-    = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
+    = getContext().getMachOSection("__DATA", "__data", 0,
+                                   SectionKind::getDataRel());
 
   CStringSection // .cstring
-    = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
-                      SectionKind::getMergeable1ByteCString());
+    = getContext().getMachOSection("__TEXT", "__cstring", 
+                                   MCSectionMachO::S_CSTRING_LITERALS,
+                                   SectionKind::getMergeable1ByteCString());
   UStringSection
-    = getMachOSection("__TEXT","__ustring", 0,
-                      SectionKind::getMergeable2ByteCString());
+    = getContext().getMachOSection("__TEXT","__ustring", 0,
+                                   SectionKind::getMergeable2ByteCString());
   FourByteConstantSection // .literal4
-    = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
-                      SectionKind::getMergeableConst4());
+    = getContext().getMachOSection("__TEXT", "__literal4",
+                                   MCSectionMachO::S_4BYTE_LITERALS,
+                                   SectionKind::getMergeableConst4());
   EightByteConstantSection // .literal8
-    = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
-                      SectionKind::getMergeableConst8());
+    = getContext().getMachOSection("__TEXT", "__literal8", 
+                                   MCSectionMachO::S_8BYTE_LITERALS,
+                                   SectionKind::getMergeableConst8());
 
   // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
   // to using it in -static mode.
@@ -493,110 +482,130 @@
   if (TM.getRelocationModel() != Reloc::Static &&
       TM.getTargetData()->getPointerSize() == 32)
     SixteenByteConstantSection =   // .literal16
-      getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
-                      SectionKind::getMergeableConst16());
+      getContext().getMachOSection("__TEXT", "__literal16",
+                                   MCSectionMachO::S_16BYTE_LITERALS,
+                                   SectionKind::getMergeableConst16());
 
   ReadOnlySection  // .const
-    = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
+    = getContext().getMachOSection("__TEXT", "__const", 0,
+                                   SectionKind::getReadOnly());
 
   TextCoalSection
-    = getMachOSection("__TEXT", "__textcoal_nt",
-                      MCSectionMachO::S_COALESCED |
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__textcoal_nt",
+                                   MCSectionMachO::S_COALESCED |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
   ConstTextCoalSection
-    = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__const_coal", 
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getText());
   ConstDataCoalSection
-    = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__DATA","__const_coal",
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getText());
   ConstDataSection  // .const_data
-    = getMachOSection("__DATA", "__const", 0,
-                      SectionKind::getReadOnlyWithRel());
+    = getContext().getMachOSection("__DATA", "__const", 0,
+                                   SectionKind::getReadOnlyWithRel());
   DataCoalSection
-    = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
-                      SectionKind::getDataRel());
+    = getContext().getMachOSection("__DATA","__datacoal_nt", 
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getDataRel());
   DataCommonSection
-    = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
-                      SectionKind::getBSS());
+    = getContext().getMachOSection("__DATA","__common",
+                                   MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
   DataBSSSection
-    = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
-                    SectionKind::getBSS());
+    = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
   
 
   LazySymbolPointerSection
-    = getMachOSection("__DATA", "__la_symbol_ptr",
-                      MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
+    = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
+                                   MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
   NonLazySymbolPointerSection
-    = getMachOSection("__DATA", "__nl_symbol_ptr",
-                      MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
+    = getContext().getMachOSection("__DATA", "__nl_symbol_ptr",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
 
   if (TM.getRelocationModel() == Reloc::Static) {
     StaticCtorSection
-      = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
+      = getContext().getMachOSection("__TEXT", "__constructor", 0,
+                                     SectionKind::getDataRel());
     StaticDtorSection
-      = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
+      = getContext().getMachOSection("__TEXT", "__destructor", 0,
+                                     SectionKind::getDataRel());
   } else {
     StaticCtorSection
-      = getMachOSection("__DATA", "__mod_init_func",
-                        MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
-                        SectionKind::getDataRel());
+      = getContext().getMachOSection("__DATA", "__mod_init_func",
+                                     MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
     StaticDtorSection
-      = getMachOSection("__DATA", "__mod_term_func",
-                        MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
-                        SectionKind::getDataRel());
+      = getContext().getMachOSection("__DATA", "__mod_term_func",
+                                     MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
   }
 
   // Exception Handling.
-  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
-                                SectionKind::getDataRel());
+  LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
+                                             SectionKind::getReadOnlyWithRel());
   EHFrameSection =
-    getMachOSection("__TEXT", "__eh_frame",
-                    MCSectionMachO::S_COALESCED |
-                    MCSectionMachO::S_ATTR_NO_TOC |
-                    MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
-                    MCSectionMachO::S_ATTR_LIVE_SUPPORT,
-                    SectionKind::getReadOnly());
+    getContext().getMachOSection("__TEXT", "__eh_frame",
+                                 MCSectionMachO::S_COALESCED |
+                                 MCSectionMachO::S_ATTR_NO_TOC |
+                                 MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                                 MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                                 SectionKind::getReadOnly());
 
   // Debug Information.
   DwarfAbbrevSection =
-    getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_abbrev", 
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfInfoSection =
-    getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_info",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfLineSection =
-    getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_line",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfFrameSection =
-    getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_frame",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_pubnames",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_pubtypes",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfStrSection =
-    getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_str",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfLocSection =
-    getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_loc",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfARangesSection =
-    getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_aranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfRangesSection =
-    getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_ranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_macinfo",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfDebugInlineSection =
-    getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_inlined",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
 }
 
 const MCSection *TargetLoweringObjectFileMachO::
@@ -609,8 +618,8 @@
     MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
                                           TAA, StubSize);
   if (!ErrorCode.empty()) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
                       "' has an invalid section specifier '" + GV->getSection()+
                       "': " + ErrorCode + ".");
     // Fall back to dropping it into the data section.
@@ -619,14 +628,14 @@
 
   // Get the section.
   const MCSectionMachO *S =
-    getMachOSection(Segment, Section, TAA, StubSize, Kind);
+    getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
 
   // Okay, now that we got the section, verify that the TAA & StubSize agree.
   // If the user declared multiple globals with different section flags, we need
   // to reject it here.
   if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
                       "' section type or attributes does not match previous"
                       " section specifier");
   }
@@ -725,9 +734,8 @@
     // FIXME: ObjC metadata is currently emitted as internal symbols that have
     // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
     // this horrible hack can go away.
-    SmallString<64> Name;
-    Mang->getNameWithPrefix(Name, GV, false);
-    if (Name[0] == 'L' || Name[0] == 'l')
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
       return false;
   }
 
@@ -735,8 +743,9 @@
 }
 
 const MCExpr *TargetLoweringObjectFileMachO::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                             MachineModuleInfo *MMI, unsigned Encoding) const {
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
   // The mach-o version of this method defaults to returning a stub reference.
 
   if (Encoding & DW_EH_PE_indirect) {
@@ -749,21 +758,20 @@
 
     // Add information about the stub reference to MachOMMI so that the stub
     // gets emitted by the asmprinter.
-    MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
-    MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
-    if (StubSym == 0) {
-      Name.clear();
-      Mang->getNameWithPrefix(Name, GV, false);
-      StubSym = getContext().GetOrCreateSymbol(Name.str());
+    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
     return TargetLoweringObjectFile::
-      getSymbolForDwarfReference(Sym, MMI,
-                                 Encoding & ~dwarf::DW_EH_PE_indirect);
+      getExprForDwarfReference(SSym, Mang, MMI,
+                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
-    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
 unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
@@ -779,7 +787,7 @@
 }
 
 unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
-  return DW_EH_PE_absptr;
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
 }
 
 //===----------------------------------------------------------------------===//
@@ -797,7 +805,7 @@
 getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
   // Create the map if it doesn't already exist.
   if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
+    UniquingMap = new COFFUniqueMapTy();
   COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
 
   // Do the lookup, if we have a hit, return it.
@@ -890,7 +898,8 @@
   if (GV->isWeakForLinker()) {
     const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false);
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append(Sym->getName().begin(), Sym->getName().end());
     return getCOFFSection(Name.str(), false, Kind);
   }
 
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index c840b39..c288ae0 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -188,8 +188,9 @@
 
   // Find the instruction that kills SavedReg.
   MachineInstr *KillMI = NULL;
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
+  for (MachineRegisterInfo::use_nodbg_iterator
+         UI = MRI->use_nodbg_begin(SavedReg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
     MachineOperand &UseMO = UI.getOperand();
     if (!UseMO.isKill())
       continue;
@@ -280,8 +281,8 @@
                                          MachineInstr *MI, MachineInstr *DefMI,
                                          MachineBasicBlock *MBB, unsigned Loc) {
   bool OtherUse = false;
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
+  for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
     MachineOperand &UseMO = UI.getOperand();
     MachineInstr *UseMI = UseMO.getParent();
     MachineBasicBlock *UseMBB = UseMI->getParent();
@@ -927,6 +928,7 @@
         mi = nmi;
         continue;
       }
+
       const TargetInstrDesc &TID = mi->getDesc();
       bool FirstTied = true;
 
@@ -1101,7 +1103,7 @@
   // Some remat'ed instructions are dead.
   int VReg = ReMatRegs.find_first();
   while (VReg != -1) {
-    if (MRI->use_empty(VReg)) {
+    if (MRI->use_nodbg_empty(VReg)) {
       MachineInstr *DefMI = MRI->getVRegDef(VReg);
       DefMI->eraseFromParent();
     }
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index b0f0a07..7b33812 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -165,20 +165,8 @@
   }
 
   // Actually remove the blocks now.
-  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = DeadBlocks[i];
-    // If there are any labels in the basic block, unregister them from
-    // MachineModuleInfo.
-    if (MMI && !MBB->empty()) {
-      for (MachineBasicBlock::iterator I = MBB->begin(),
-             E = MBB->end(); I != E; ++I) {
-        if (I->isLabel())
-          // The label ID # is always operand #0, an immediate.
-          MMI->InvalidateLabel(I->getOperand(0).getImm());
-      }
-    }
-    MBB->eraseFromParent();
-  }
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+    DeadBlocks[i]->eraseFromParent();
 
   // Cleanup PHI nodes.
   for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 7aa0a91..ac00194 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -9,7 +9,9 @@
 
 #define DEBUG_TYPE "virtregrewriter"
 #include "VirtRegRewriter.h"
+#include "VirtRegMap.h"
 #include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -98,7 +100,7 @@
   bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
                             LiveIntervals* LIs) {
     DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
-    DEBUG(dbgs() << "********** Function: " 
+    DEBUG(dbgs() << "********** Function: "
           << MF.getFunction()->getName() << '\n');
     DEBUG(dbgs() << "**** Machine Instrs"
           << "(NOTE! Does not include spills and reloads!) ****\n");
@@ -135,10 +137,10 @@
         changed |= !reglist.empty();
       }
     }
-    
+
     DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
-    
+
     return changed;
   }
 
@@ -208,7 +210,7 @@
   /// in the specified physreg.  If CanClobber is true, the physreg can be
   /// modified at any time without changing the semantics of the program.
   void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
-    // If this stack slot is thought to be available in some other physreg, 
+    // If this stack slot is thought to be available in some other physreg,
     // remove its record.
     ModifyStackSlotOrReMat(SlotOrReMat);
 
@@ -364,7 +366,7 @@
 
   // AssignedPhysReg - The physreg that was assigned for use by the reload.
   unsigned AssignedPhysReg;
-  
+
   // VirtReg - The virtual register itself.
   unsigned VirtReg;
 
@@ -384,11 +386,11 @@
   ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
     PhysRegsClobbered.resize(tri->getNumRegs());
   }
-  
+
   bool hasReuses() const {
     return !Reuses.empty();
   }
-  
+
   /// addReuse - If we choose to reuse a virtual register that is already
   /// available instead of reloading it, remember that we did so.
   void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
@@ -397,9 +399,9 @@
     // If the reload is to the assigned register anyway, no undo will be
     // required.
     if (PhysRegReused == AssignedPhysReg) return;
-    
+
     // Otherwise, remember this.
-    Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, 
+    Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
                               AssignedPhysReg, VirtReg));
   }
 
@@ -410,10 +412,10 @@
   bool isClobbered(unsigned PhysReg) const {
     return PhysRegsClobbered.test(PhysReg);
   }
-  
+
   /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
   /// is some other operand that is using the specified register, either pick
-  /// a new register to use, or evict the previous reload and use this reg. 
+  /// a new register to use, or evict the previous reload and use this reg.
   unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
                            MachineFunction &MF, MachineInstr *MI,
                            AvailableSpills &Spills,
@@ -525,7 +527,7 @@
 /// reference.
 static bool InvalidateRegDef(MachineBasicBlock::iterator I,
                              MachineInstr &NewDef, unsigned Reg,
-                             bool &HasLiveDef, 
+                             bool &HasLiveDef,
                              const TargetRegisterInfo *TRI) {
   // Due to remat, it's possible this reg isn't being reused. That is,
   // the def of this reg (by prev MI) is now dead.
@@ -572,6 +574,9 @@
 static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
                         BitVector &RegKills,
                         std::vector<MachineOperand*> &KillOps) {
+  // These do not affect kill info at all.
+  if (MI.isDebugValue())
+    return;
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isUse() || MO.isUndef())
@@ -579,7 +584,7 @@
     unsigned Reg = MO.getReg();
     if (Reg == 0)
       continue;
-    
+
     if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
       // That can't be right. Register is killed but not re-defined and it's
       // being reused. Let's fix that.
@@ -597,7 +602,7 @@
       }
     } else {
       // Check for subreg kills as well.
-      // d4 = 
+      // d4 =
       // store d4, fi#0
       // ...
       //    = s8<kill>
@@ -802,7 +807,7 @@
   if (It == SpillSlotsOrReMatsAvailable.end()) return;
   unsigned Reg = It->second >> 1;
   SpillSlotsOrReMatsAvailable.erase(It);
-  
+
   // This register may hold the value of multiple stack slots, only remove this
   // stack slot from the set of values the register contains.
   std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
@@ -832,7 +837,7 @@
                          VirtRegMap &VRM) {
   const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
   const TargetRegisterInfo *TRI = Spills.getRegInfo();
-  
+
   if (Reuses.empty()) return PhysReg;  // This is most often empty.
 
   for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
@@ -853,7 +858,7 @@
     } else {
       // Otherwise, we might also have a problem if a previously reused
       // value aliases the new register. If so, codegen the previous reload
-      // and use this one.          
+      // and use this one.
       unsigned PRRU = Op.PhysRegReused;
       if (TRI->regsOverlap(PRRU, PhysReg)) {
         // Okay, we found out that an alias of a reused register
@@ -890,7 +895,7 @@
 
         bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
         int SSorRMId = DoReMat
-          ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat;
+          ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
 
         // Back-schedule reloads and remats.
         MachineBasicBlock::iterator InsertLoc =
@@ -900,13 +905,13 @@
         if (DoReMat) {
           ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
                         TRI, VRM);
-        } else { 
+        } else {
           TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
                                     NewOp.StackSlotOrReMat, AliasRC);
           MachineInstr *LoadMI = prior(InsertLoc);
           VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
           // Any stores to this stack slot are not dead anymore.
-          MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;            
+          MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
           ++NumLoads;
         }
         Spills.ClobberPhysReg(NewPhysReg);
@@ -919,10 +924,10 @@
         Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
         UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
         DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-        
+
         DEBUG(dbgs() << "Reuse undone!\n");
         --NumReused;
-        
+
         // Finally, PhysReg is now available, go ahead and use it.
         return PhysReg;
       }
@@ -987,10 +992,17 @@
   SmallVector<unsigned, 4> Kills;
 
   // Take a look at 2 instructions at most.
-  for (unsigned Count = 0; Count < 2; ++Count) {
+  unsigned Count = 0;
+  while (Count < 2) {
     if (MII == MBB.begin())
       break;
     MachineInstr *PrevMI = prior(MII);
+    MII = PrevMI;
+
+    if (PrevMI->isDebugValue())
+      continue; // Skip over dbg_value instructions.
+    ++Count;
+
     for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = PrevMI->getOperand(i);
       if (!MO.isReg() || MO.getReg() == 0)
@@ -1019,8 +1031,6 @@
       for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
         Uses.set(*AS);
     }
-
-    MII = PrevMI;
   }
 
   return 0;
@@ -1037,1409 +1047,1481 @@
 }
 
 namespace {
-  struct RefSorter {
-    bool operator()(const std::pair<MachineInstr*, int> &A,
-                    const std::pair<MachineInstr*, int> &B) {
-      return A.second < B.second;
-    }
-  };
-}
+
+struct RefSorter {
+  bool operator()(const std::pair<MachineInstr*, int> &A,
+                  const std::pair<MachineInstr*, int> &B) {
+    return A.second < B.second;
+  }
+};
 
 // ***************************** //
 // Local Spiller Implementation  //
 // ***************************** //
 
-namespace {
-
 class LocalRewriter : public VirtRegRewriter {
-  MachineRegisterInfo *RegInfo;
+  MachineRegisterInfo *MRI;
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
+  VirtRegMap *VRM;
   BitVector AllocatableRegs;
   DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+  MachineBasicBlock *MBB;       // Basic block currently being processed.
+
 public:
 
   bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                            LiveIntervals* LIs) {
-    RegInfo = &MF.getRegInfo(); 
-    TRI = MF.getTarget().getRegisterInfo();
-    TII = MF.getTarget().getInstrInfo();
-    AllocatableRegs = TRI->getAllocatableSet(MF);
-    DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
-          << MF.getFunction()->getName() << "':\n");
-    DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
-                    " reloads!) ****\n");
-    DEBUG(MF.dump());
-
-    // Spills - Keep track of which spilled values are available in physregs
-    // so that we can choose to reuse the physregs instead of emitting
-    // reloads. This is usually refreshed per basic block.
-    AvailableSpills Spills(TRI, TII);
-
-    // Keep track of kill information.
-    BitVector RegKills(TRI->getNumRegs());
-    std::vector<MachineOperand*> KillOps;
-    KillOps.resize(TRI->getNumRegs(), NULL);
-
-    // SingleEntrySuccs - Successor blocks which have a single predecessor.
-    SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
-    SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
-
-    // Traverse the basic blocks depth first.
-    MachineBasicBlock *Entry = MF.begin();
-    SmallPtrSet<MachineBasicBlock*,16> Visited;
-    for (df_ext_iterator<MachineBasicBlock*,
-           SmallPtrSet<MachineBasicBlock*,16> >
-           DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
-         DFI != E; ++DFI) {
-      MachineBasicBlock *MBB = *DFI;
-      if (!EarlyVisited.count(MBB))
-        RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
-
-      // If this MBB is the only predecessor of a successor. Keep the
-      // availability information and visit it next.
-      do {
-        // Keep visiting single predecessor successor as long as possible.
-        SinglePredSuccs.clear();
-        findSinglePredSuccessor(MBB, SinglePredSuccs);
-        if (SinglePredSuccs.empty())
-          MBB = 0;
-        else {
-          // FIXME: More than one successors, each of which has MBB has
-          // the only predecessor.
-          MBB = SinglePredSuccs[0];
-          if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
-            Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
-            RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
-          }
-        }
-      } while (MBB);
-
-      // Clear the availability info.
-      Spills.clear();
-    }
-
-    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
-    DEBUG(MF.dump());
-
-    // Mark unused spill slots.
-    MachineFrameInfo *MFI = MF.getFrameInfo();
-    int SS = VRM.getLowSpillSlot();
-    if (SS != VirtRegMap::NO_STACK_SLOT)
-      for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS)
-        if (!VRM.isSpillSlotUsed(SS)) {
-          MFI->RemoveStackObject(SS);
-          ++NumDSS;
-        }
-
-    return true;
-  }
+                            LiveIntervals* LIs);
 
 private:
 
-  /// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
-  /// a scratch register is available.
-  ///     xorq  %r12<kill>, %r13
-  ///     addq  %rax, -184(%rbp)
-  ///     addq  %r13, -184(%rbp)
-  /// ==>
-  ///     xorq  %r12<kill>, %r13
-  ///     movq  -184(%rbp), %r12
-  ///     addq  %rax, %r12
-  ///     addq  %r13, %r12
-  ///     movq  %r12, -184(%rbp)
   bool OptimizeByUnfold2(unsigned VirtReg, int SS,
-                         MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator &MII,
                          std::vector<MachineInstr*> &MaybeDeadStores,
                          AvailableSpills &Spills,
                          BitVector &RegKills,
-                         std::vector<MachineOperand*> &KillOps,
-                         VirtRegMap &VRM) {
+                         std::vector<MachineOperand*> &KillOps);
 
-    MachineBasicBlock::iterator NextMII = llvm::next(MII);
-    if (NextMII == MBB.end())
-      return false;
-
-    if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
-      return false;
-
-    // Now let's see if the last couple of instructions happens to have freed up
-    // a register.
-    const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-    unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs);
-    if (!PhysReg)
-      return false;
-
-    MachineFunction &MF = *MBB.getParent();
-    TRI = MF.getTarget().getRegisterInfo();
-    MachineInstr &MI = *MII;
-    if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM))
-      return false;
-
-    // If the next instruction also folds the same SS modref and can be unfoled,
-    // then it's worthwhile to issue a load from SS into the free register and
-    // then unfold these instructions.
-    if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM))
-      return false;
-
-    // Back-schedule reloads and remats.
-    ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, false, SS, TII, MF);
-
-    // Load from SS to the spare physical register.
-    TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC);
-    // This invalidates Phys.
-    Spills.ClobberPhysReg(PhysReg);
-    // Remember it's available.
-    Spills.addAvailable(SS, PhysReg);
-    MaybeDeadStores[SS] = NULL;
-
-    // Unfold current MI.
-    SmallVector<MachineInstr*, 4> NewMIs;
-    if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
-      llvm_unreachable("Unable unfold the load / store folding instruction!");
-    assert(NewMIs.size() == 1);
-    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
-    VRM.transferRestorePts(&MI, NewMIs[0]);
-    MII = MBB.insert(MII, NewMIs[0]);
-    InvalidateKills(MI, TRI, RegKills, KillOps);
-    VRM.RemoveMachineInstrFromMaps(&MI);
-    MBB.erase(&MI);
-    ++NumModRefUnfold;
-
-    // Unfold next instructions that fold the same SS.
-    do {
-      MachineInstr &NextMI = *NextMII;
-      NextMII = llvm::next(NextMII);
-      NewMIs.clear();
-      if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
-        llvm_unreachable("Unable unfold the load / store folding instruction!");
-      assert(NewMIs.size() == 1);
-      AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
-      VRM.transferRestorePts(&NextMI, NewMIs[0]);
-      MBB.insert(NextMII, NewMIs[0]);
-      InvalidateKills(NextMI, TRI, RegKills, KillOps);
-      VRM.RemoveMachineInstrFromMaps(&NextMI);
-      MBB.erase(&NextMI);
-      ++NumModRefUnfold;
-      if (NextMII == MBB.end())
-        break;
-    } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM));
-
-    // Store the value back into SS.
-    TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC);
-    MachineInstr *StoreMI = prior(NextMII);
-    VRM.addSpillSlotUse(SS, StoreMI);
-    VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-
-    return true;
-  }
-
-  /// OptimizeByUnfold - Turn a store folding instruction into a load folding
-  /// instruction. e.g.
-  ///     xorl  %edi, %eax
-  ///     movl  %eax, -32(%ebp)
-  ///     movl  -36(%ebp), %eax
-  ///     orl   %eax, -32(%ebp)
-  /// ==>
-  ///     xorl  %edi, %eax
-  ///     orl   -36(%ebp), %eax
-  ///     mov   %eax, -32(%ebp)
-  /// This enables unfolding optimization for a subsequent instruction which will
-  /// also eliminate the newly introduced store instruction.
-  bool OptimizeByUnfold(MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator &MII,
+  bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
                         std::vector<MachineInstr*> &MaybeDeadStores,
                         AvailableSpills &Spills,
                         BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps,
-                        VirtRegMap &VRM) {
-    MachineFunction &MF = *MBB.getParent();
-    MachineInstr &MI = *MII;
-    unsigned UnfoldedOpc = 0;
-    unsigned UnfoldPR = 0;
-    unsigned UnfoldVR = 0;
-    int FoldedSS = VirtRegMap::NO_STACK_SLOT;
-    VirtRegMap::MI2VirtMapTy::const_iterator I, End;
-    for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
-      // Only transform a MI that folds a single register.
-      if (UnfoldedOpc)
-        return false;
-      UnfoldVR = I->second.first;
-      VirtRegMap::ModRef MR = I->second.second;
-      // MI2VirtMap be can updated which invalidate the iterator.
-      // Increment the iterator first.
-      ++I; 
-      if (VRM.isAssignedReg(UnfoldVR))
-        continue;
-      // If this reference is not a use, any previous store is now dead.
-      // Otherwise, the store to this stack slot is not dead anymore.
-      FoldedSS = VRM.getStackSlot(UnfoldVR);
-      MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
-      if (DeadStore && (MR & VirtRegMap::isModRef)) {
-        unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
-        if (!PhysReg || !DeadStore->readsRegister(PhysReg))
-          continue;
-        UnfoldPR = PhysReg;
-        UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
-                                                      false, true);
-      }
-    }
+                        std::vector<MachineOperand*> &KillOps);
 
-    if (!UnfoldedOpc) {
-      if (!UnfoldVR)
-        return false;
-
-      // Look for other unfolding opportunities.
-      return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII,
-                               MaybeDeadStores, Spills, RegKills, KillOps, VRM);
-    }
-
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
-        continue;
-      unsigned VirtReg = MO.getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
-        continue;
-      if (VRM.isAssignedReg(VirtReg)) {
-        unsigned PhysReg = VRM.getPhys(VirtReg);
-        if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
-          return false;
-      } else if (VRM.isReMaterialized(VirtReg))
-        continue;
-      int SS = VRM.getStackSlot(VirtReg);
-      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-      if (PhysReg) {
-        if (TRI->regsOverlap(PhysReg, UnfoldPR))
-          return false;
-        continue;
-      }
-      if (VRM.hasPhys(VirtReg)) {
-        PhysReg = VRM.getPhys(VirtReg);
-        if (!TRI->regsOverlap(PhysReg, UnfoldPR))
-          continue;
-      }
-
-      // Ok, we'll need to reload the value into a register which makes
-      // it impossible to perform the store unfolding optimization later.
-      // Let's see if it is possible to fold the load if the store is
-      // unfolded. This allows us to perform the store unfolding
-      // optimization.
-      SmallVector<MachineInstr*, 4> NewMIs;
-      if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
-        assert(NewMIs.size() == 1);
-        MachineInstr *NewMI = NewMIs.back();
-        NewMIs.clear();
-        int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
-        assert(Idx != -1);
-        SmallVector<unsigned, 1> Ops;
-        Ops.push_back(Idx);
-        MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
-        if (FoldedMI) {
-          VRM.addSpillSlotUse(SS, FoldedMI);
-          if (!VRM.hasPhys(UnfoldVR))
-            VRM.assignVirt2Phys(UnfoldVR, UnfoldPR);
-          VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
-          MII = MBB.insert(MII, FoldedMI);
-          InvalidateKills(MI, TRI, RegKills, KillOps);
-          VRM.RemoveMachineInstrFromMaps(&MI);
-          MBB.erase(&MI);
-          MF.DeleteMachineInstr(NewMI);
-          return true;
-        }
-        MF.DeleteMachineInstr(NewMI);
-      }
-    }
-
-    return false;
-  }
-
-  /// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
-  /// where SrcReg is r1 and it is tied to r0. Return true if after
-  /// commuting this instruction it will be r0 = op r2, r1.
-  static bool CommuteChangesDestination(MachineInstr *DefMI,
-                                        const TargetInstrDesc &TID,
-                                        unsigned SrcReg,
-                                        const TargetInstrInfo *TII,
-                                        unsigned &DstIdx) {
-    if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
-      return false;
-    if (!DefMI->getOperand(1).isReg() ||
-        DefMI->getOperand(1).getReg() != SrcReg)
-      return false;
-    unsigned DefIdx;
-    if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
-      return false;
-    unsigned SrcIdx1, SrcIdx2;
-    if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
-      return false;
-    if (SrcIdx1 == 1 && SrcIdx2 == 2) {
-      DstIdx = 2;
-      return true;
-    }
-    return false;
-  }
-
-  /// CommuteToFoldReload -
-  /// Look for
-  /// r1 = load fi#1
-  /// r1 = op r1, r2<kill>
-  /// store r1, fi#1
-  ///
-  /// If op is commutable and r2 is killed, then we can xform these to
-  /// r2 = op r2, fi#1
-  /// store r2, fi#1
-  bool CommuteToFoldReload(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator &MII,
+  bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
                            unsigned VirtReg, unsigned SrcReg, int SS,
                            AvailableSpills &Spills,
                            BitVector &RegKills,
                            std::vector<MachineOperand*> &KillOps,
-                           const TargetRegisterInfo *TRI,
-                           VirtRegMap &VRM) {
-    if (MII == MBB.begin() || !MII->killsRegister(SrcReg))
-      return false;
+                           const TargetRegisterInfo *TRI);
 
-    MachineFunction &MF = *MBB.getParent();
-    MachineInstr &MI = *MII;
-    MachineBasicBlock::iterator DefMII = prior(MII);
-    MachineInstr *DefMI = DefMII;
-    const TargetInstrDesc &TID = DefMI->getDesc();
-    unsigned NewDstIdx;
-    if (DefMII != MBB.begin() &&
-        TID.isCommutable() &&
-        CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
-      MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
-      unsigned NewReg = NewDstMO.getReg();
-      if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
-        return false;
-      MachineInstr *ReloadMI = prior(DefMII);
-      int FrameIdx;
-      unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
-      if (DestReg != SrcReg || FrameIdx != SS)
-        return false;
-      int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
-      if (UseIdx == -1)
-        return false;
-      unsigned DefIdx;
-      if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
-        return false;
-      assert(DefMI->getOperand(DefIdx).isReg() &&
-             DefMI->getOperand(DefIdx).getReg() == SrcReg);
-
-      // Now commute def instruction.
-      MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
-      if (!CommutedMI)
-        return false;
-      SmallVector<unsigned, 1> Ops;
-      Ops.push_back(NewDstIdx);
-      MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
-      // Not needed since foldMemoryOperand returns new MI.
-      MF.DeleteMachineInstr(CommutedMI);
-      if (!FoldedMI)
-        return false;
-
-      VRM.addSpillSlotUse(SS, FoldedMI);
-      VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
-      // Insert new def MI and spill MI.
-      const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-      TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC);
-      MII = prior(MII);
-      MachineInstr *StoreMI = MII;
-      VRM.addSpillSlotUse(SS, StoreMI);
-      VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-      MII = MBB.insert(MII, FoldedMI);  // Update MII to backtrack.
-
-      // Delete all 3 old instructions.
-      InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
-      VRM.RemoveMachineInstrFromMaps(ReloadMI);
-      MBB.erase(ReloadMI);
-      InvalidateKills(*DefMI, TRI, RegKills, KillOps);
-      VRM.RemoveMachineInstrFromMaps(DefMI);
-      MBB.erase(DefMI);
-      InvalidateKills(MI, TRI, RegKills, KillOps);
-      VRM.RemoveMachineInstrFromMaps(&MI);
-      MBB.erase(&MI);
-
-      // If NewReg was previously holding value of some SS, it's now clobbered.
-      // This has to be done now because it's a physical register. When this
-      // instruction is re-visited, it's ignored.
-      Spills.ClobberPhysReg(NewReg);
-
-      ++NumCommutes;
-      return true;
-    }
-
-    return false;
-  }
-
-  /// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
-  /// the last store to the same slot is now dead. If so, remove the last store.
-  void SpillRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator &MII,
+  void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
                            int Idx, unsigned PhysReg, int StackSlot,
                            const TargetRegisterClass *RC,
                            bool isAvailable, MachineInstr *&LastStore,
                            AvailableSpills &Spills,
                            SmallSet<MachineInstr*, 4> &ReMatDefs,
                            BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           VirtRegMap &VRM) {
+                           std::vector<MachineOperand*> &KillOps);
 
-    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-    TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
-    MachineInstr *StoreMI = prior(oldNextMII);
-    VRM.addSpillSlotUse(StackSlot, StoreMI);
-    DEBUG(dbgs() << "Store:\t" << *StoreMI);
+  void TransferDeadness(unsigned Reg, BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps);
 
-    // If there is a dead store to this stack slot, nuke it now.
-    if (LastStore) {
-      DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
-      ++NumDSE;
-      SmallVector<unsigned, 2> KillRegs;
-      InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
-      MachineBasicBlock::iterator PrevMII = LastStore;
-      bool CheckDef = PrevMII != MBB.begin();
-      if (CheckDef)
-        --PrevMII;
-      VRM.RemoveMachineInstrFromMaps(LastStore);
-      MBB.erase(LastStore);
-      if (CheckDef) {
-        // Look at defs of killed registers on the store. Mark the defs
-        // as dead since the store has been deleted and they aren't
-        // being reused.
-        for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
-          bool HasOtherDef = false;
-          if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
-            MachineInstr *DeadDef = PrevMII;
-            if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
-              // FIXME: This assumes a remat def does not have side effects.
-              VRM.RemoveMachineInstrFromMaps(DeadDef);
-              MBB.erase(DeadDef);
-              ++NumDRM;
-            }
-          }
+  bool InsertEmergencySpills(MachineInstr *MI);
+
+  bool InsertRestores(MachineInstr *MI,
+                      AvailableSpills &Spills,
+                      BitVector &RegKills,
+                      std::vector<MachineOperand*> &KillOps);
+
+  bool InsertSpills(MachineInstr *MI);
+
+  void RewriteMBB(LiveIntervals *LIs,
+                  AvailableSpills &Spills, BitVector &RegKills,
+                  std::vector<MachineOperand*> &KillOps);
+};
+}
+
+bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
+                                         LiveIntervals* LIs) {
+  MRI = &MF.getRegInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
+  VRM = &vrm;
+  AllocatableRegs = TRI->getAllocatableSet(MF);
+  DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
+        << MF.getFunction()->getName() << "':\n");
+  DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
+        " reloads!) ****\n");
+  DEBUG(MF.dump());
+
+  // Spills - Keep track of which spilled values are available in physregs
+  // so that we can choose to reuse the physregs instead of emitting
+  // reloads. This is usually refreshed per basic block.
+  AvailableSpills Spills(TRI, TII);
+
+  // Keep track of kill information.
+  BitVector RegKills(TRI->getNumRegs());
+  std::vector<MachineOperand*> KillOps;
+  KillOps.resize(TRI->getNumRegs(), NULL);
+
+  // SingleEntrySuccs - Successor blocks which have a single predecessor.
+  SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+  SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+  // Traverse the basic blocks depth first.
+  MachineBasicBlock *Entry = MF.begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*,
+         SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MBB = *DFI;
+    if (!EarlyVisited.count(MBB))
+      RewriteMBB(LIs, Spills, RegKills, KillOps);
+
+    // If this MBB is the only predecessor of a successor. Keep the
+    // availability information and visit it next.
+    do {
+      // Keep visiting single predecessor successor as long as possible.
+      SinglePredSuccs.clear();
+      findSinglePredSuccessor(MBB, SinglePredSuccs);
+      if (SinglePredSuccs.empty())
+        MBB = 0;
+      else {
+        // FIXME: More than one successors, each of which has MBB has
+        // the only predecessor.
+        MBB = SinglePredSuccs[0];
+        if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+          Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
+          RewriteMBB(LIs, Spills, RegKills, KillOps);
         }
       }
-    }
+    } while (MBB);
 
-    // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
-    // the last of multiple instructions is the actual store.
-    LastStore = prior(oldNextMII);
-
-    // If the stack slot value was previously available in some other
-    // register, change it now.  Otherwise, make the register available,
-    // in PhysReg.
-    Spills.ModifyStackSlotOrReMat(StackSlot);
-    Spills.ClobberPhysReg(PhysReg);
-    Spills.addAvailable(StackSlot, PhysReg, isAvailable);
-    ++NumStores;
+    // Clear the availability info.
+    Spills.clear();
   }
 
-  /// isSafeToDelete - Return true if this instruction doesn't produce any side
-  /// effect and all of its defs are dead.
-  static bool isSafeToDelete(MachineInstr &MI) {
-    const TargetInstrDesc &TID = MI.getDesc();
-    if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
-        TID.isCall() || TID.isBarrier() || TID.isReturn() ||
-        TID.hasUnmodeledSideEffects())
+  DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+  DEBUG(MF.dump());
+
+  // Mark unused spill slots.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  int SS = VRM->getLowSpillSlot();
+  if (SS != VirtRegMap::NO_STACK_SLOT)
+    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS)
+      if (!VRM->isSpillSlotUsed(SS)) {
+        MFI->RemoveStackObject(SS);
+        ++NumDSS;
+      }
+
+  return true;
+}
+
+/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
+/// a scratch register is available.
+///     xorq  %r12<kill>, %r13
+///     addq  %rax, -184(%rbp)
+///     addq  %r13, -184(%rbp)
+/// ==>
+///     xorq  %r12<kill>, %r13
+///     movq  -184(%rbp), %r12
+///     addq  %rax, %r12
+///     addq  %r13, %r12
+///     movq  %r12, -184(%rbp)
+bool LocalRewriter::
+OptimizeByUnfold2(unsigned VirtReg, int SS,
+                  MachineBasicBlock::iterator &MII,
+                  std::vector<MachineInstr*> &MaybeDeadStores,
+                  AvailableSpills &Spills,
+                  BitVector &RegKills,
+                  std::vector<MachineOperand*> &KillOps) {
+
+  MachineBasicBlock::iterator NextMII = llvm::next(MII);
+  // Skip over dbg_value instructions.
+  while (NextMII != MBB->end() && NextMII->isDebugValue())
+    NextMII = llvm::next(NextMII);
+  if (NextMII == MBB->end())
+    return false;
+
+  if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
+    return false;
+
+  // Now let's see if the last couple of instructions happens to have freed up
+  // a register.
+  const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+  unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
+  if (!PhysReg)
+    return false;
+
+  MachineFunction &MF = *MBB->getParent();
+  TRI = MF.getTarget().getRegisterInfo();
+  MachineInstr &MI = *MII;
+  if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
+    return false;
+
+  // If the next instruction also folds the same SS modref and can be unfoled,
+  // then it's worthwhile to issue a load from SS into the free register and
+  // then unfold these instructions.
+  if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
+    return false;
+
+  // Back-schedule reloads and remats.
+  ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
+
+  // Load from SS to the spare physical register.
+  TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC);
+  // This invalidates Phys.
+  Spills.ClobberPhysReg(PhysReg);
+  // Remember it's available.
+  Spills.addAvailable(SS, PhysReg);
+  MaybeDeadStores[SS] = NULL;
+
+  // Unfold current MI.
+  SmallVector<MachineInstr*, 4> NewMIs;
+  if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+    llvm_unreachable("Unable unfold the load / store folding instruction!");
+  assert(NewMIs.size() == 1);
+  AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+  VRM->transferRestorePts(&MI, NewMIs[0]);
+  MII = MBB->insert(MII, NewMIs[0]);
+  InvalidateKills(MI, TRI, RegKills, KillOps);
+  VRM->RemoveMachineInstrFromMaps(&MI);
+  MBB->erase(&MI);
+  ++NumModRefUnfold;
+
+  // Unfold next instructions that fold the same SS.
+  do {
+    MachineInstr &NextMI = *NextMII;
+    NextMII = llvm::next(NextMII);
+    NewMIs.clear();
+    if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
+      llvm_unreachable("Unable unfold the load / store folding instruction!");
+    assert(NewMIs.size() == 1);
+    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+    VRM->transferRestorePts(&NextMI, NewMIs[0]);
+    MBB->insert(NextMII, NewMIs[0]);
+    InvalidateKills(NextMI, TRI, RegKills, KillOps);
+    VRM->RemoveMachineInstrFromMaps(&NextMI);
+    MBB->erase(&NextMI);
+    ++NumModRefUnfold;
+    // Skip over dbg_value instructions.
+    while (NextMII != MBB->end() && NextMII->isDebugValue())
+      NextMII = llvm::next(NextMII);
+    if (NextMII == MBB->end())
+      break;
+  } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
+
+  // Store the value back into SS.
+  TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC);
+  MachineInstr *StoreMI = prior(NextMII);
+  VRM->addSpillSlotUse(SS, StoreMI);
+  VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+
+  return true;
+}
+
+/// OptimizeByUnfold - Turn a store folding instruction into a load folding
+/// instruction. e.g.
+///     xorl  %edi, %eax
+///     movl  %eax, -32(%ebp)
+///     movl  -36(%ebp), %eax
+///     orl   %eax, -32(%ebp)
+/// ==>
+///     xorl  %edi, %eax
+///     orl   -36(%ebp), %eax
+///     mov   %eax, -32(%ebp)
+/// This enables unfolding optimization for a subsequent instruction which will
+/// also eliminate the newly introduced store instruction.
+bool LocalRewriter::
+OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+                 std::vector<MachineInstr*> &MaybeDeadStores,
+                 AvailableSpills &Spills,
+                 BitVector &RegKills,
+                 std::vector<MachineOperand*> &KillOps) {
+  MachineFunction &MF = *MBB->getParent();
+  MachineInstr &MI = *MII;
+  unsigned UnfoldedOpc = 0;
+  unsigned UnfoldPR = 0;
+  unsigned UnfoldVR = 0;
+  int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+  for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
+    // Only transform a MI that folds a single register.
+    if (UnfoldedOpc)
       return false;
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (!MO.isReg() || !MO.getReg())
+    UnfoldVR = I->second.first;
+    VirtRegMap::ModRef MR = I->second.second;
+    // MI2VirtMap be can updated which invalidate the iterator.
+    // Increment the iterator first.
+    ++I;
+    if (VRM->isAssignedReg(UnfoldVR))
+      continue;
+    // If this reference is not a use, any previous store is now dead.
+    // Otherwise, the store to this stack slot is not dead anymore.
+    FoldedSS = VRM->getStackSlot(UnfoldVR);
+    MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+    if (DeadStore && (MR & VirtRegMap::isModRef)) {
+      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+      if (!PhysReg || !DeadStore->readsRegister(PhysReg))
         continue;
-      if (MO.isDef() && !MO.isDead())
-        return false;
-      if (MO.isUse() && MO.isKill())
-        // FIXME: We can't remove kill markers or else the scavenger will assert.
-        // An alternative is to add a ADD pseudo instruction to replace kill
-        // markers.
-        return false;
+      UnfoldPR = PhysReg;
+      UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+                                                    false, true);
     }
+  }
+
+  if (!UnfoldedOpc) {
+    if (!UnfoldVR)
+      return false;
+
+    // Look for other unfolding opportunities.
+    return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
+                             RegKills, KillOps);
+  }
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
+      continue;
+    unsigned VirtReg = MO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
+      continue;
+    if (VRM->isAssignedReg(VirtReg)) {
+      unsigned PhysReg = VRM->getPhys(VirtReg);
+      if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
+        return false;
+    } else if (VRM->isReMaterialized(VirtReg))
+      continue;
+    int SS = VRM->getStackSlot(VirtReg);
+    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+    if (PhysReg) {
+      if (TRI->regsOverlap(PhysReg, UnfoldPR))
+        return false;
+      continue;
+    }
+    if (VRM->hasPhys(VirtReg)) {
+      PhysReg = VRM->getPhys(VirtReg);
+      if (!TRI->regsOverlap(PhysReg, UnfoldPR))
+        continue;
+    }
+
+    // Ok, we'll need to reload the value into a register which makes
+    // it impossible to perform the store unfolding optimization later.
+    // Let's see if it is possible to fold the load if the store is
+    // unfolded. This allows us to perform the store unfolding
+    // optimization.
+    SmallVector<MachineInstr*, 4> NewMIs;
+    if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+      assert(NewMIs.size() == 1);
+      MachineInstr *NewMI = NewMIs.back();
+      NewMIs.clear();
+      int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
+      assert(Idx != -1);
+      SmallVector<unsigned, 1> Ops;
+      Ops.push_back(Idx);
+      MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
+      if (FoldedMI) {
+        VRM->addSpillSlotUse(SS, FoldedMI);
+        if (!VRM->hasPhys(UnfoldVR))
+          VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
+        VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+        MII = MBB->insert(MII, FoldedMI);
+        InvalidateKills(MI, TRI, RegKills, KillOps);
+        VRM->RemoveMachineInstrFromMaps(&MI);
+        MBB->erase(&MI);
+        MF.DeleteMachineInstr(NewMI);
+        return true;
+      }
+      MF.DeleteMachineInstr(NewMI);
+    }
+  }
+
+  return false;
+}
+
+/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
+/// where SrcReg is r1 and it is tied to r0. Return true if after
+/// commuting this instruction it will be r0 = op r2, r1.
+static bool CommuteChangesDestination(MachineInstr *DefMI,
+                                      const TargetInstrDesc &TID,
+                                      unsigned SrcReg,
+                                      const TargetInstrInfo *TII,
+                                      unsigned &DstIdx) {
+  if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+    return false;
+  if (!DefMI->getOperand(1).isReg() ||
+      DefMI->getOperand(1).getReg() != SrcReg)
+    return false;
+  unsigned DefIdx;
+  if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
+    return false;
+  unsigned SrcIdx1, SrcIdx2;
+  if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+    return false;
+  if (SrcIdx1 == 1 && SrcIdx2 == 2) {
+    DstIdx = 2;
+    return true;
+  }
+  return false;
+}
+
+/// CommuteToFoldReload -
+/// Look for
+/// r1 = load fi#1
+/// r1 = op r1, r2<kill>
+/// store r1, fi#1
+///
+/// If op is commutable and r2 is killed, then we can xform these to
+/// r2 = op r2, fi#1
+/// store r2, fi#1
+bool LocalRewriter::
+CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+                    unsigned VirtReg, unsigned SrcReg, int SS,
+                    AvailableSpills &Spills,
+                    BitVector &RegKills,
+                    std::vector<MachineOperand*> &KillOps,
+                    const TargetRegisterInfo *TRI) {
+  if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
+    return false;
+
+  MachineFunction &MF = *MBB->getParent();
+  MachineInstr &MI = *MII;
+  MachineBasicBlock::iterator DefMII = prior(MII);
+  MachineInstr *DefMI = DefMII;
+  const TargetInstrDesc &TID = DefMI->getDesc();
+  unsigned NewDstIdx;
+  if (DefMII != MBB->begin() &&
+      TID.isCommutable() &&
+      CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
+    MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+    unsigned NewReg = NewDstMO.getReg();
+    if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
+      return false;
+    MachineInstr *ReloadMI = prior(DefMII);
+    int FrameIdx;
+    unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+    if (DestReg != SrcReg || FrameIdx != SS)
+      return false;
+    int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+    if (UseIdx == -1)
+      return false;
+    unsigned DefIdx;
+    if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
+      return false;
+    assert(DefMI->getOperand(DefIdx).isReg() &&
+           DefMI->getOperand(DefIdx).getReg() == SrcReg);
+
+    // Now commute def instruction.
+    MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
+    if (!CommutedMI)
+      return false;
+    SmallVector<unsigned, 1> Ops;
+    Ops.push_back(NewDstIdx);
+    MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
+    // Not needed since foldMemoryOperand returns new MI.
+    MF.DeleteMachineInstr(CommutedMI);
+    if (!FoldedMI)
+      return false;
+
+    VRM->addSpillSlotUse(SS, FoldedMI);
+    VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+    // Insert new def MI and spill MI.
+    const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+    TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC);
+    MII = prior(MII);
+    MachineInstr *StoreMI = MII;
+    VRM->addSpillSlotUse(SS, StoreMI);
+    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+    MII = MBB->insert(MII, FoldedMI);  // Update MII to backtrack.
+
+    // Delete all 3 old instructions.
+    InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
+    VRM->RemoveMachineInstrFromMaps(ReloadMI);
+    MBB->erase(ReloadMI);
+    InvalidateKills(*DefMI, TRI, RegKills, KillOps);
+    VRM->RemoveMachineInstrFromMaps(DefMI);
+    MBB->erase(DefMI);
+    InvalidateKills(MI, TRI, RegKills, KillOps);
+    VRM->RemoveMachineInstrFromMaps(&MI);
+    MBB->erase(&MI);
+
+    // If NewReg was previously holding value of some SS, it's now clobbered.
+    // This has to be done now because it's a physical register. When this
+    // instruction is re-visited, it's ignored.
+    Spills.ClobberPhysReg(NewReg);
+
+    ++NumCommutes;
     return true;
   }
 
-  /// TransferDeadness - A identity copy definition is dead and it's being
-  /// removed. Find the last def or use and mark it as dead / kill.
-  void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
-                        unsigned Reg, BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps,
-                        VirtRegMap &VRM) {
-    SmallPtrSet<MachineInstr*, 4> Seens;
-    SmallVector<std::pair<MachineInstr*, int>,8> Refs;
-    for (MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(Reg),
-           RE = RegInfo->reg_end(); RI != RE; ++RI) {
-      MachineInstr *UDMI = &*RI;
-      if (UDMI->getParent() != MBB)
-        continue;
-      DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
-      if (DI == DistanceMap.end() || DI->second > CurDist)
-        continue;
-      if (Seens.insert(UDMI))
-        Refs.push_back(std::make_pair(UDMI, DI->second));
-    }
+  return false;
+}
 
-    if (Refs.empty())
-      return;
-    std::sort(Refs.begin(), Refs.end(), RefSorter());
+/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
+/// the last store to the same slot is now dead. If so, remove the last store.
+void LocalRewriter::
+SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+                    int Idx, unsigned PhysReg, int StackSlot,
+                    const TargetRegisterClass *RC,
+                    bool isAvailable, MachineInstr *&LastStore,
+                    AvailableSpills &Spills,
+                    SmallSet<MachineInstr*, 4> &ReMatDefs,
+                    BitVector &RegKills,
+                    std::vector<MachineOperand*> &KillOps) {
 
-    while (!Refs.empty()) {
-      MachineInstr *LastUDMI = Refs.back().first;
-      Refs.pop_back();
+  MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+  TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
+  MachineInstr *StoreMI = prior(oldNextMII);
+  VRM->addSpillSlotUse(StackSlot, StoreMI);
+  DEBUG(dbgs() << "Store:\t" << *StoreMI);
 
-      MachineOperand *LastUD = NULL;
-      for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = LastUDMI->getOperand(i);
-        if (!MO.isReg() || MO.getReg() != Reg)
-          continue;
-        if (!LastUD || (LastUD->isUse() && MO.isDef()))
-          LastUD = &MO;
-        if (LastUDMI->isRegTiedToDefOperand(i))
-          break;
-      }
-      if (LastUD->isDef()) {
-        // If the instruction has no side effect, delete it and propagate
-        // backward further. Otherwise, mark is dead and we are done.
-        if (!isSafeToDelete(*LastUDMI)) {
-          LastUD->setIsDead();
-          break;
+  // If there is a dead store to this stack slot, nuke it now.
+  if (LastStore) {
+    DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
+    ++NumDSE;
+    SmallVector<unsigned, 2> KillRegs;
+    InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
+    MachineBasicBlock::iterator PrevMII = LastStore;
+    bool CheckDef = PrevMII != MBB->begin();
+    if (CheckDef)
+      --PrevMII;
+    VRM->RemoveMachineInstrFromMaps(LastStore);
+    MBB->erase(LastStore);
+    if (CheckDef) {
+      // Look at defs of killed registers on the store. Mark the defs
+      // as dead since the store has been deleted and they aren't
+      // being reused.
+      for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+        bool HasOtherDef = false;
+        if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
+          MachineInstr *DeadDef = PrevMII;
+          if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+            // FIXME: This assumes a remat def does not have side effects.
+            VRM->RemoveMachineInstrFromMaps(DeadDef);
+            MBB->erase(DeadDef);
+            ++NumDRM;
+          }
         }
-        VRM.RemoveMachineInstrFromMaps(LastUDMI);
-        MBB->erase(LastUDMI);
-      } else {
-        LastUD->setIsKill();
-        RegKills.set(Reg);
-        KillOps[Reg] = LastUD;
-        break;
       }
     }
   }
 
-  /// rewriteMBB - Keep track of which spills are available even after the
-  /// register allocator is done with them.  If possible, avid reloading vregs.
-  void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
-                  LiveIntervals *LIs,
-                  AvailableSpills &Spills, BitVector &RegKills,
-                  std::vector<MachineOperand*> &KillOps) {
+  // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
+  // the last of multiple instructions is the actual store.
+  LastStore = prior(oldNextMII);
 
-    DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
-          << MBB.getName() << "':\n");
+  // If the stack slot value was previously available in some other
+  // register, change it now.  Otherwise, make the register available,
+  // in PhysReg.
+  Spills.ModifyStackSlotOrReMat(StackSlot);
+  Spills.ClobberPhysReg(PhysReg);
+  Spills.addAvailable(StackSlot, PhysReg, isAvailable);
+  ++NumStores;
+}
 
-    MachineFunction &MF = *MBB.getParent();
-    
-    // MaybeDeadStores - When we need to write a value back into a stack slot,
-    // keep track of the inserted store.  If the stack slot value is never read
-    // (because the value was used from some available register, for example), and
-    // subsequently stored to, the original store is dead.  This map keeps track
-    // of inserted stores that are not used.  If we see a subsequent store to the
-    // same stack slot, the original store is deleted.
-    std::vector<MachineInstr*> MaybeDeadStores;
-    MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
+/// isSafeToDelete - Return true if this instruction doesn't produce any side
+/// effect and all of its defs are dead.
+static bool isSafeToDelete(MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+      TID.isCall() || TID.isBarrier() || TID.isReturn() ||
+      TID.hasUnmodeledSideEffects())
+    return false;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.getReg())
+      continue;
+    if (MO.isDef() && !MO.isDead())
+      return false;
+    if (MO.isUse() && MO.isKill())
+      // FIXME: We can't remove kill markers or else the scavenger will assert.
+      // An alternative is to add a ADD pseudo instruction to replace kill
+      // markers.
+      return false;
+  }
+  return true;
+}
 
-    // ReMatDefs - These are rematerializable def MIs which are not deleted.
-    SmallSet<MachineInstr*, 4> ReMatDefs;
+/// TransferDeadness - A identity copy definition is dead and it's being
+/// removed. Find the last def or use and mark it as dead / kill.
+void LocalRewriter::
+TransferDeadness(unsigned Reg, BitVector &RegKills,
+                 std::vector<MachineOperand*> &KillOps) {
+  SmallPtrSet<MachineInstr*, 4> Seens;
+  SmallVector<std::pair<MachineInstr*, int>,8> Refs;
+  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+         RE = MRI->reg_end(); RI != RE; ++RI) {
+    MachineInstr *UDMI = &*RI;
+    if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+    if (DI == DistanceMap.end())
+      continue;
+    if (Seens.insert(UDMI))
+      Refs.push_back(std::make_pair(UDMI, DI->second));
+  }
 
-    // Clear kill info.
-    SmallSet<unsigned, 2> KilledMIRegs;
-    RegKills.reset();
-    KillOps.clear();
-    KillOps.resize(TRI->getNumRegs(), NULL);
+  if (Refs.empty())
+    return;
+  std::sort(Refs.begin(), Refs.end(), RefSorter());
 
-    unsigned Dist = 0;
-    DistanceMap.clear();
-    for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
-         MII != E; ) {
-      MachineBasicBlock::iterator NextMII = llvm::next(MII);
+  while (!Refs.empty()) {
+    MachineInstr *LastUDMI = Refs.back().first;
+    Refs.pop_back();
 
-      VirtRegMap::MI2VirtMapTy::const_iterator I, End;
-      bool Erased = false;
-      bool BackTracked = false;
-      if (OptimizeByUnfold(MBB, MII,
-                           MaybeDeadStores, Spills, RegKills, KillOps, VRM))
-        NextMII = llvm::next(MII);
+    MachineOperand *LastUD = NULL;
+    for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = LastUDMI->getOperand(i);
+      if (!MO.isReg() || MO.getReg() != Reg)
+        continue;
+      if (!LastUD || (LastUD->isUse() && MO.isDef()))
+        LastUD = &MO;
+      if (LastUDMI->isRegTiedToDefOperand(i))
+        break;
+    }
+    if (LastUD->isDef()) {
+      // If the instruction has no side effect, delete it and propagate
+      // backward further. Otherwise, mark is dead and we are done.
+      if (!isSafeToDelete(*LastUDMI)) {
+        LastUD->setIsDead();
+        break;
+      }
+      VRM->RemoveMachineInstrFromMaps(LastUDMI);
+      MBB->erase(LastUDMI);
+    } else {
+      LastUD->setIsKill();
+      RegKills.set(Reg);
+      KillOps[Reg] = LastUD;
+      break;
+    }
+  }
+}
 
-      MachineInstr &MI = *MII;
+/// InsertEmergencySpills - Insert emergency spills before MI if requested by
+/// VRM. Return true if spills were inserted.
+bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
+  if (!VRM->hasEmergencySpills(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  SmallSet<int, 4> UsedSS;
+  std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
+  for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
+    unsigned PhysReg = EmSpills[i];
+    const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(PhysReg);
+    assert(RC && "Unable to determine register class!");
+    int SS = VRM->getEmergencySpillSlot(RC);
+    if (UsedSS.count(SS))
+      llvm_unreachable("Need to spill more than one physical registers!");
+    UsedSS.insert(SS);
+    TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC);
+    MachineInstr *StoreMI = prior(MII);
+    VRM->addSpillSlotUse(SS, StoreMI);
 
-      if (VRM.hasEmergencySpills(&MI)) {
-        // Spill physical register(s) in the rare case the allocator has run out
-        // of registers to allocate.
-        SmallSet<int, 4> UsedSS;
-        std::vector<unsigned> &EmSpills = VRM.getEmergencySpills(&MI);
-        for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
-          unsigned PhysReg = EmSpills[i];
-          const TargetRegisterClass *RC =
-            TRI->getPhysicalRegisterRegClass(PhysReg);
-          assert(RC && "Unable to determine register class!");
-          int SS = VRM.getEmergencySpillSlot(RC);
-          if (UsedSS.count(SS))
-            llvm_unreachable("Need to spill more than one physical registers!");
-          UsedSS.insert(SS);
-          TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC);
-          MachineInstr *StoreMI = prior(MII);
-          VRM.addSpillSlotUse(SS, StoreMI);
+    // Back-schedule reloads and remats.
+    MachineBasicBlock::iterator InsertLoc =
+      ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
+                       TII, *MBB->getParent());
 
-          // Back-schedule reloads and remats.
-          MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(llvm::next(MII), MBB.begin(), PhysReg, TRI, false,
-                             SS, TII, MF);
+    TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC);
 
-          TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC);
+    MachineInstr *LoadMI = prior(InsertLoc);
+    VRM->addSpillSlotUse(SS, LoadMI);
+    ++NumPSpills;
+    DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+  }
+  return true;
+}
 
-          MachineInstr *LoadMI = prior(InsertLoc);
-          VRM.addSpillSlotUse(SS, LoadMI);
-          ++NumPSpills;
-          DistanceMap.insert(std::make_pair(LoadMI, Dist++));
-        }
-        NextMII = llvm::next(MII);
+/// InsertRestores - Restore registers before MI is requested by VRM. Return
+/// true is any instructions were inserted.
+bool LocalRewriter::InsertRestores(MachineInstr *MI,
+                                   AvailableSpills &Spills,
+                                   BitVector &RegKills,
+                                   std::vector<MachineOperand*> &KillOps) {
+  if (!VRM->isRestorePt(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
+  for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
+    unsigned VirtReg = RestoreRegs[e-i-1];  // Reverse order.
+    if (!VRM->getPreSplitReg(VirtReg))
+      continue; // Split interval spilled again.
+    unsigned Phys = VRM->getPhys(VirtReg);
+    MRI->setPhysRegUsed(Phys);
+
+    // Check if the value being restored if available. If so, it must be
+    // from a predecessor BB that fallthrough into this BB. We do not
+    // expect:
+    // BB1:
+    // r1 = load fi#1
+    // ...
+    //    = r1<kill>
+    // ... # r1 not clobbered
+    // ...
+    //    = load fi#1
+    bool DoReMat = VRM->isReMaterialized(VirtReg);
+    int SSorRMId = DoReMat
+      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+    const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+    unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+    if (InReg == Phys) {
+      // If the value is already available in the expected register, save
+      // a reload / remat.
+      if (SSorRMId)
+        DEBUG(dbgs() << "Reusing RM#"
+                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+      else
+        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+      DEBUG(dbgs() << " from physreg "
+                   << TRI->getName(InReg) << " for vreg"
+                   << VirtReg <<" instead of reloading into physreg "
+                   << TRI->getName(Phys) << '\n');
+      ++NumOmitted;
+      continue;
+    } else if (InReg && InReg != Phys) {
+      if (SSorRMId)
+        DEBUG(dbgs() << "Reusing RM#"
+                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+      else
+        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+      DEBUG(dbgs() << " from physreg "
+                   << TRI->getName(InReg) << " for vreg"
+                   << VirtReg <<" by copying it into physreg "
+                   << TRI->getName(Phys) << '\n');
+
+      // If the reloaded / remat value is available in another register,
+      // copy it to the desired register.
+
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+                         *MBB->getParent());
+
+      TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC);
+
+      // This invalidates Phys.
+      Spills.ClobberPhysReg(Phys);
+      // Remember it's available.
+      Spills.addAvailable(SSorRMId, Phys);
+
+      // Mark is killed.
+      MachineInstr *CopyMI = prior(InsertLoc);
+      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+      MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
+      KillOpnd->setIsKill();
+      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+      DEBUG(dbgs() << '\t' << *CopyMI);
+      ++NumCopified;
+      continue;
+    }
+
+    // Back-schedule reloads and remats.
+    MachineBasicBlock::iterator InsertLoc =
+      ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+                       *MBB->getParent());
+
+    if (VRM->isReMaterialized(VirtReg)) {
+      ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
+    } else {
+      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+      TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC);
+      MachineInstr *LoadMI = prior(InsertLoc);
+      VRM->addSpillSlotUse(SSorRMId, LoadMI);
+      ++NumLoads;
+      DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+    }
+
+    // This invalidates Phys.
+    Spills.ClobberPhysReg(Phys);
+    // Remember it's available.
+    Spills.addAvailable(SSorRMId, Phys);
+
+    UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+    DEBUG(dbgs() << '\t' << *prior(MII));
+  }
+  return true;
+}
+
+/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return
+/// true if spills were inserted.
+bool LocalRewriter::InsertSpills(MachineInstr *MI) {
+  if (!VRM->isSpillPt(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  std::vector<std::pair<unsigned,bool> > &SpillRegs =
+    VRM->getSpillPtSpills(MI);
+  for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
+    unsigned VirtReg = SpillRegs[i].first;
+    bool isKill = SpillRegs[i].second;
+    if (!VRM->getPreSplitReg(VirtReg))
+      continue; // Split interval spilled again.
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    unsigned Phys = VRM->getPhys(VirtReg);
+    int StackSlot = VRM->getStackSlot(VirtReg);
+    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+    TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
+                             RC);
+    MachineInstr *StoreMI = prior(oldNextMII);
+    VRM->addSpillSlotUse(StackSlot, StoreMI);
+    DEBUG(dbgs() << "Store:\t" << *StoreMI);
+    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+  }
+  return true;
+}
+
+
+/// rewriteMBB - Keep track of which spills are available even after the
+/// register allocator is done with them.  If possible, avid reloading vregs.
+void
+LocalRewriter::RewriteMBB(LiveIntervals *LIs,
+                          AvailableSpills &Spills, BitVector &RegKills,
+                          std::vector<MachineOperand*> &KillOps) {
+
+  DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
+               << MBB->getName() << "':\n");
+
+  MachineFunction &MF = *MBB->getParent();
+
+  // MaybeDeadStores - When we need to write a value back into a stack slot,
+  // keep track of the inserted store.  If the stack slot value is never read
+  // (because the value was used from some available register, for example), and
+  // subsequently stored to, the original store is dead.  This map keeps track
+  // of inserted stores that are not used.  If we see a subsequent store to the
+  // same stack slot, the original store is deleted.
+  std::vector<MachineInstr*> MaybeDeadStores;
+  MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
+
+  // ReMatDefs - These are rematerializable def MIs which are not deleted.
+  SmallSet<MachineInstr*, 4> ReMatDefs;
+
+  // Clear kill info.
+  SmallSet<unsigned, 2> KilledMIRegs;
+  RegKills.reset();
+  KillOps.clear();
+  KillOps.resize(TRI->getNumRegs(), NULL);
+
+  DistanceMap.clear();
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E; ) {
+    MachineBasicBlock::iterator NextMII = llvm::next(MII);
+
+    if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
+      NextMII = llvm::next(MII);
+
+    if (InsertEmergencySpills(MII))
+      NextMII = llvm::next(MII);
+
+    InsertRestores(MII, Spills, RegKills, KillOps);
+
+    if (InsertSpills(MII))
+      NextMII = llvm::next(MII);
+
+    VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+    bool Erased = false;
+    bool BackTracked = false;
+    MachineInstr &MI = *MII;
+
+    /// ReusedOperands - Keep track of operand reuse in case we need to undo
+    /// reuse.
+    ReuseInfo ReusedOperands(MI, TRI);
+    SmallVector<unsigned, 4> VirtUseOps;
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() || MO.getReg() == 0)
+        continue;   // Ignore non-register operands.
+
+      unsigned VirtReg = MO.getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+        // Ignore physregs for spilling, but remember that it is used by this
+        // function.
+        MRI->setPhysRegUsed(VirtReg);
+        continue;
       }
 
-      // Insert restores here if asked to.
-      if (VRM.isRestorePt(&MI)) {
-        std::vector<unsigned> &RestoreRegs = VRM.getRestorePtRestores(&MI);
-        for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
-          unsigned VirtReg = RestoreRegs[e-i-1];  // Reverse order.
-          if (!VRM.getPreSplitReg(VirtReg))
-            continue; // Split interval spilled again.
-          unsigned Phys = VRM.getPhys(VirtReg);
-          RegInfo->setPhysRegUsed(Phys);
+      // We want to process implicit virtual register uses first.
+      if (MO.isImplicit())
+        // If the virtual register is implicitly defined, emit a implicit_def
+        // before so scavenger knows it's "defined".
+        // FIXME: This is a horrible hack done the by register allocator to
+        // remat a definition with virtual register operand.
+        VirtUseOps.insert(VirtUseOps.begin(), i);
+      else
+        VirtUseOps.push_back(i);
+    }
 
-          // Check if the value being restored if available. If so, it must be
-          // from a predecessor BB that fallthrough into this BB. We do not
-          // expect:
-          // BB1:
-          // r1 = load fi#1
-          // ...
-          //    = r1<kill>
-          // ... # r1 not clobbered
-          // ...
-          //    = load fi#1
-          bool DoReMat = VRM.isReMaterialized(VirtReg);
-          int SSorRMId = DoReMat
-            ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
-          const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-          unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-          if (InReg == Phys) {
-            // If the value is already available in the expected register, save
-            // a reload / remat.
-            if (SSorRMId)
-              DEBUG(dbgs() << "Reusing RM#"
-                           << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
-            else
-              DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
-            DEBUG(dbgs() << " from physreg "
-                         << TRI->getName(InReg) << " for vreg"
-                         << VirtReg <<" instead of reloading into physreg "
-                         << TRI->getName(Phys) << '\n');
-            ++NumOmitted;
-            continue;
-          } else if (InReg && InReg != Phys) {
-            if (SSorRMId)
-              DEBUG(dbgs() << "Reusing RM#"
-                           << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
-            else
-              DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
-            DEBUG(dbgs() << " from physreg "
-                         << TRI->getName(InReg) << " for vreg"
-                         << VirtReg <<" by copying it into physreg "
-                         << TRI->getName(Phys) << '\n');
+    // Process all of the spilled uses and all non spilled reg references.
+    SmallVector<int, 2> PotentialDeadStoreSlots;
+    KilledMIRegs.clear();
+    for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+      unsigned i = VirtUseOps[j];
+      unsigned VirtReg = MI.getOperand(i).getReg();
+      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+             "Not a virtual register?");
 
-            // If the reloaded / remat value is available in another register,
-            // copy it to the desired register.
-
-            // Back-schedule reloads and remats.
-            MachineBasicBlock::iterator InsertLoc =
-              ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
-                               SSorRMId, TII, MF);
-
-            TII->copyRegToReg(MBB, InsertLoc, Phys, InReg, RC, RC);
-
-            // This invalidates Phys.
-            Spills.ClobberPhysReg(Phys);
-            // Remember it's available.
-            Spills.addAvailable(SSorRMId, Phys);
-
-            // Mark is killed.
-            MachineInstr *CopyMI = prior(InsertLoc);
-            CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-            MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
-            KillOpnd->setIsKill();
-            UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-            DEBUG(dbgs() << '\t' << *CopyMI);
-            ++NumCopified;
-            continue;
-          }
-
-          // Back-schedule reloads and remats.
-          MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
-                             SSorRMId, TII, MF);
-
-          if (VRM.isReMaterialized(VirtReg)) {
-            ReMaterialize(MBB, InsertLoc, Phys, VirtReg, TII, TRI, VRM);
-          } else {
-            const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-            TII->loadRegFromStackSlot(MBB, InsertLoc, Phys, SSorRMId, RC);
-            MachineInstr *LoadMI = prior(InsertLoc);
-            VRM.addSpillSlotUse(SSorRMId, LoadMI);
-            ++NumLoads;
-            DistanceMap.insert(std::make_pair(LoadMI, Dist++));
-          }
-
-          // This invalidates Phys.
-          Spills.ClobberPhysReg(Phys);
-          // Remember it's available.
-          Spills.addAvailable(SSorRMId, Phys);
-
-          UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-          DEBUG(dbgs() << '\t' << *prior(MII));
-        }
+      unsigned SubIdx = MI.getOperand(i).getSubReg();
+      if (VRM->isAssignedReg(VirtReg)) {
+        // This virtual register was assigned a physreg!
+        unsigned Phys = VRM->getPhys(VirtReg);
+        MRI->setPhysRegUsed(Phys);
+        if (MI.getOperand(i).isDef())
+          ReusedOperands.markClobbered(Phys);
+        substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+        if (VRM->isImplicitlyDefined(VirtReg))
+          // FIXME: Is this needed?
+          BuildMI(*MBB, &MI, MI.getDebugLoc(),
+                  TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+        continue;
       }
 
-      // Insert spills here if asked to.
-      if (VRM.isSpillPt(&MI)) {
-        std::vector<std::pair<unsigned,bool> > &SpillRegs =
-          VRM.getSpillPtSpills(&MI);
-        for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
-          unsigned VirtReg = SpillRegs[i].first;
-          bool isKill = SpillRegs[i].second;
-          if (!VRM.getPreSplitReg(VirtReg))
-            continue; // Split interval spilled again.
-          const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
-          unsigned Phys = VRM.getPhys(VirtReg);
-          int StackSlot = VRM.getStackSlot(VirtReg);
-          MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-          TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC);
-          MachineInstr *StoreMI = prior(oldNextMII);
-          VRM.addSpillSlotUse(StackSlot, StoreMI);
-          DEBUG(dbgs() << "Store:\t" << *StoreMI);
-          VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-        }
-        NextMII = llvm::next(MII);
+      // This virtual register is now known to be a spilled value.
+      if (!MI.getOperand(i).isUse())
+        continue;  // Handle defs in the loop below (handle use&def here though)
+
+      bool AvoidReload = MI.getOperand(i).isUndef();
+      // Check if it is defined by an implicit def. It should not be spilled.
+      // Note, this is for correctness reason. e.g.
+      // 8   %reg1024<def> = IMPLICIT_DEF
+      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+      // The live range [12, 14) are not part of the r1024 live interval since
+      // it's defined by an implicit def. It will not conflicts with live
+      // interval of r1025. Now suppose both registers are spilled, you can
+      // easily see a situation where both registers are reloaded before
+      // the INSERT_SUBREG and both target registers that would overlap.
+      bool DoReMat = VRM->isReMaterialized(VirtReg);
+      int SSorRMId = DoReMat
+        ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+      int ReuseSlot = SSorRMId;
+
+      // Check to see if this stack slot is available.
+      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+      // If this is a sub-register use, make sure the reuse register is in the
+      // right register class. For example, for x86 not all of the 32-bit
+      // registers have accessible sub-registers.
+      // Similarly so for EXTRACT_SUBREG. Consider this:
+      // EDI = op
+      // MOV32_mr fi#1, EDI
+      // ...
+      //       = EXTRACT_SUBREG fi#1
+      // fi#1 is available in EDI, but it cannot be reused because it's not in
+      // the right register file.
+      if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) {
+        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+        if (!RC->contains(PhysReg))
+          PhysReg = 0;
       }
 
-      /// ReusedOperands - Keep track of operand reuse in case we need to undo
-      /// reuse.
-      ReuseInfo ReusedOperands(MI, TRI);
-      SmallVector<unsigned, 4> VirtUseOps;
-      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI.getOperand(i);
-        if (!MO.isReg() || MO.getReg() == 0)
-          continue;   // Ignore non-register operands.
-        
-        unsigned VirtReg = MO.getReg();
-        if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
-          // Ignore physregs for spilling, but remember that it is used by this
-          // function.
-          RegInfo->setPhysRegUsed(VirtReg);
-          continue;
+      if (PhysReg && !AvoidReload) {
+        // This spilled operand might be part of a two-address operand.  If this
+        // is the case, then changing it will necessarily require changing the
+        // def part of the instruction as well.  However, in some cases, we
+        // aren't allowed to modify the reused register.  If none of these cases
+        // apply, reuse it.
+        bool CanReuse = true;
+        bool isTied = MI.isRegTiedToDefOperand(i);
+        if (isTied) {
+          // Okay, we have a two address operand.  We can reuse this physreg as
+          // long as we are allowed to clobber the value and there isn't an
+          // earlier def that has already clobbered the physreg.
+          CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+            Spills.canClobberPhysReg(PhysReg);
         }
 
-        // We want to process implicit virtual register uses first.
-        if (MO.isImplicit())
-          // If the virtual register is implicitly defined, emit a implicit_def
-          // before so scavenger knows it's "defined".
-          // FIXME: This is a horrible hack done the by register allocator to
-          // remat a definition with virtual register operand.
-          VirtUseOps.insert(VirtUseOps.begin(), i);
-        else
-          VirtUseOps.push_back(i);
-      }
-
-      // Process all of the spilled uses and all non spilled reg references.
-      SmallVector<int, 2> PotentialDeadStoreSlots;
-      KilledMIRegs.clear();
-      for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
-        unsigned i = VirtUseOps[j];
-        unsigned VirtReg = MI.getOperand(i).getReg();
-        assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-               "Not a virtual register?");
-
-        unsigned SubIdx = MI.getOperand(i).getSubReg();
-        if (VRM.isAssignedReg(VirtReg)) {
-          // This virtual register was assigned a physreg!
-          unsigned Phys = VRM.getPhys(VirtReg);
-          RegInfo->setPhysRegUsed(Phys);
-          if (MI.getOperand(i).isDef())
-            ReusedOperands.markClobbered(Phys);
-          substitutePhysReg(MI.getOperand(i), Phys, *TRI);
-          if (VRM.isImplicitlyDefined(VirtReg))
-            // FIXME: Is this needed?
-            BuildMI(MBB, &MI, MI.getDebugLoc(),
-                    TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
-          continue;
-        }
-
-        // This virtual register is now known to be a spilled value.
-        if (!MI.getOperand(i).isUse())
-          continue;  // Handle defs in the loop below (handle use&def here though)
-
-        bool AvoidReload = MI.getOperand(i).isUndef();
-        // Check if it is defined by an implicit def. It should not be spilled.
-        // Note, this is for correctness reason. e.g.
-        // 8   %reg1024<def> = IMPLICIT_DEF
-        // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-        // The live range [12, 14) are not part of the r1024 live interval since
-        // it's defined by an implicit def. It will not conflicts with live
-        // interval of r1025. Now suppose both registers are spilled, you can
-        // easily see a situation where both registers are reloaded before
-        // the INSERT_SUBREG and both target registers that would overlap.
-        bool DoReMat = VRM.isReMaterialized(VirtReg);
-        int SSorRMId = DoReMat
-          ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
-        int ReuseSlot = SSorRMId;
-
-        // Check to see if this stack slot is available.
-        unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
-        // If this is a sub-register use, make sure the reuse register is in the
-        // right register class. For example, for x86 not all of the 32-bit
-        // registers have accessible sub-registers.
-        // Similarly so for EXTRACT_SUBREG. Consider this:
-        // EDI = op
-        // MOV32_mr fi#1, EDI
-        // ...
-        //       = EXTRACT_SUBREG fi#1
-        // fi#1 is available in EDI, but it cannot be reused because it's not in
-        // the right register file.
-        if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) {
-          const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-          if (!RC->contains(PhysReg))
-            PhysReg = 0;
-        }
-
-        if (PhysReg && !AvoidReload) {
-          // This spilled operand might be part of a two-address operand.  If this
-          // is the case, then changing it will necessarily require changing the 
-          // def part of the instruction as well.  However, in some cases, we
-          // aren't allowed to modify the reused register.  If none of these cases
-          // apply, reuse it.
-          bool CanReuse = true;
-          bool isTied = MI.isRegTiedToDefOperand(i);
-          if (isTied) {
-            // Okay, we have a two address operand.  We can reuse this physreg as
-            // long as we are allowed to clobber the value and there isn't an
-            // earlier def that has already clobbered the physreg.
-            CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
-              Spills.canClobberPhysReg(PhysReg);
-          }
-          
-          if (CanReuse) {
-            // If this stack slot value is already available, reuse it!
-            if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DEBUG(dbgs() << "Reusing RM#"
-                           << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-            else
-              DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-            DEBUG(dbgs() << " from physreg "
-                         << TRI->getName(PhysReg) << " for vreg"
-                         << VirtReg <<" instead of reloading into physreg "
-                         << TRI->getName(VRM.getPhys(VirtReg)) << '\n');
-            unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-            MI.getOperand(i).setReg(RReg);
-            MI.getOperand(i).setSubReg(0);
-
-            // The only technical detail we have is that we don't know that
-            // PhysReg won't be clobbered by a reloaded stack slot that occurs
-            // later in the instruction.  In particular, consider 'op V1, V2'.
-            // If V1 is available in physreg R0, we would choose to reuse it
-            // here, instead of reloading it into the register the allocator
-            // indicated (say R1).  However, V2 might have to be reloaded
-            // later, and it might indicate that it needs to live in R0.  When
-            // this occurs, we need to have information available that
-            // indicates it is safe to use R1 for the reload instead of R0.
-            //
-            // To further complicate matters, we might conflict with an alias,
-            // or R0 and R1 might not be compatible with each other.  In this
-            // case, we actually insert a reload for V1 in R1, ensuring that
-            // we can get at R0 or its alias.
-            ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
-                                    VRM.getPhys(VirtReg), VirtReg);
-            if (isTied)
-              // Only mark it clobbered if this is a use&def operand.
-              ReusedOperands.markClobbered(PhysReg);
-            ++NumReused;
-
-            if (MI.getOperand(i).isKill() &&
-                ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
-              // The store of this spilled value is potentially dead, but we
-              // won't know for certain until we've confirmed that the re-use
-              // above is valid, which means waiting until the other operands
-              // are processed. For now we just track the spill slot, we'll
-              // remove it after the other operands are processed if valid.
-
-              PotentialDeadStoreSlots.push_back(ReuseSlot);
-            }
-
-            // Mark is isKill if it's there no other uses of the same virtual
-            // register and it's not a two-address operand. IsKill will be
-            // unset if reg is reused.
-            if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
-              MI.getOperand(i).setIsKill();
-              KilledMIRegs.insert(VirtReg);
-            }
-
-            continue;
-          }  // CanReuse
-          
-          // Otherwise we have a situation where we have a two-address instruction
-          // whose mod/ref operand needs to be reloaded.  This reload is already
-          // available in some register "PhysReg", but if we used PhysReg as the
-          // operand to our 2-addr instruction, the instruction would modify
-          // PhysReg.  This isn't cool if something later uses PhysReg and expects
-          // to get its initial value.
-          //
-          // To avoid this problem, and to avoid doing a load right after a store,
-          // we emit a copy from PhysReg into the designated register for this
-          // operand.
-          unsigned DesignatedReg = VRM.getPhys(VirtReg);
-          assert(DesignatedReg && "Must map virtreg to physreg!");
-
-          // Note that, if we reused a register for a previous operand, the
-          // register we want to reload into might not actually be
-          // available.  If this occurs, use the register indicated by the
-          // reuser.
-          if (ReusedOperands.hasReuses())
-            DesignatedReg = ReusedOperands.GetRegForReload(VirtReg,
-                                                           DesignatedReg, &MI, 
-                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
-          
-          // If the mapped designated register is actually the physreg we have
-          // incoming, we don't need to inserted a dead copy.
-          if (DesignatedReg == PhysReg) {
-            // If this stack slot value is already available, reuse it!
-            if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DEBUG(dbgs() << "Reusing RM#"
-                    << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-            else
-              DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-            DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
-                         << " for vreg" << VirtReg
-                         << " instead of reloading into same physreg.\n");
-            unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-            MI.getOperand(i).setReg(RReg);
-            MI.getOperand(i).setSubReg(0);
-            ReusedOperands.markClobbered(RReg);
-            ++NumReused;
-            continue;
-          }
-          
-          const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-          RegInfo->setPhysRegUsed(DesignatedReg);
-          ReusedOperands.markClobbered(DesignatedReg);
-
-          // Back-schedule reloads and remats.
-          MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(&MI, MBB.begin(), PhysReg, TRI, DoReMat,
-                             SSorRMId, TII, MF);
-
-          TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
-
-          MachineInstr *CopyMI = prior(InsertLoc);
-          CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-          UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-          // This invalidates DesignatedReg.
-          Spills.ClobberPhysReg(DesignatedReg);
-          
-          Spills.addAvailable(ReuseSlot, DesignatedReg);
-          unsigned RReg =
-            SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+        if (CanReuse) {
+          // If this stack slot value is already available, reuse it!
+          if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+            DEBUG(dbgs() << "Reusing RM#"
+                  << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+          else
+            DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+          DEBUG(dbgs() << " from physreg "
+                << TRI->getName(PhysReg) << " for vreg"
+                << VirtReg <<" instead of reloading into physreg "
+                << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
+          unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
           MI.getOperand(i).setReg(RReg);
           MI.getOperand(i).setSubReg(0);
-          DEBUG(dbgs() << '\t' << *prior(MII));
+
+          // The only technical detail we have is that we don't know that
+          // PhysReg won't be clobbered by a reloaded stack slot that occurs
+          // later in the instruction.  In particular, consider 'op V1, V2'.
+          // If V1 is available in physreg R0, we would choose to reuse it
+          // here, instead of reloading it into the register the allocator
+          // indicated (say R1).  However, V2 might have to be reloaded
+          // later, and it might indicate that it needs to live in R0.  When
+          // this occurs, we need to have information available that
+          // indicates it is safe to use R1 for the reload instead of R0.
+          //
+          // To further complicate matters, we might conflict with an alias,
+          // or R0 and R1 might not be compatible with each other.  In this
+          // case, we actually insert a reload for V1 in R1, ensuring that
+          // we can get at R0 or its alias.
+          ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+                                  VRM->getPhys(VirtReg), VirtReg);
+          if (isTied)
+            // Only mark it clobbered if this is a use&def operand.
+            ReusedOperands.markClobbered(PhysReg);
           ++NumReused;
+
+          if (MI.getOperand(i).isKill() &&
+              ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+            // The store of this spilled value is potentially dead, but we
+            // won't know for certain until we've confirmed that the re-use
+            // above is valid, which means waiting until the other operands
+            // are processed. For now we just track the spill slot, we'll
+            // remove it after the other operands are processed if valid.
+
+            PotentialDeadStoreSlots.push_back(ReuseSlot);
+          }
+
+          // Mark is isKill if it's there no other uses of the same virtual
+          // register and it's not a two-address operand. IsKill will be
+          // unset if reg is reused.
+          if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+            MI.getOperand(i).setIsKill();
+            KilledMIRegs.insert(VirtReg);
+          }
+
           continue;
-        } // if (PhysReg)
-        
-        // Otherwise, reload it and remember that we have it.
-        PhysReg = VRM.getPhys(VirtReg);
-        assert(PhysReg && "Must map virtreg to physreg!");
+        }  // CanReuse
+
+        // Otherwise we have a situation where we have a two-address instruction
+        // whose mod/ref operand needs to be reloaded.  This reload is already
+        // available in some register "PhysReg", but if we used PhysReg as the
+        // operand to our 2-addr instruction, the instruction would modify
+        // PhysReg.  This isn't cool if something later uses PhysReg and expects
+        // to get its initial value.
+        //
+        // To avoid this problem, and to avoid doing a load right after a store,
+        // we emit a copy from PhysReg into the designated register for this
+        // operand.
+        unsigned DesignatedReg = VRM->getPhys(VirtReg);
+        assert(DesignatedReg && "Must map virtreg to physreg!");
 
         // Note that, if we reused a register for a previous operand, the
         // register we want to reload into might not actually be
         // available.  If this occurs, use the register indicated by the
         // reuser.
         if (ReusedOperands.hasReuses())
-          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, 
-                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
-        
-        RegInfo->setPhysRegUsed(PhysReg);
-        ReusedOperands.markClobbered(PhysReg);
-        if (AvoidReload)
-          ++NumAvoided;
-        else {
-          // Back-schedule reloads and remats.
-          MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, DoReMat,
-                             SSorRMId, TII, MF);
+          DesignatedReg = ReusedOperands.
+            GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+                            MaybeDeadStores, RegKills, KillOps, *VRM);
 
-          if (DoReMat) {
-            ReMaterialize(MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, VRM);
-          } else {
-            const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-            TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SSorRMId, RC);
-            MachineInstr *LoadMI = prior(InsertLoc);
-            VRM.addSpillSlotUse(SSorRMId, LoadMI);
-            ++NumLoads;
-            DistanceMap.insert(std::make_pair(LoadMI, Dist++));
-          }
-          // This invalidates PhysReg.
-          Spills.ClobberPhysReg(PhysReg);
-
-          // Any stores to this stack slot are not dead anymore.
-          if (!DoReMat)
-            MaybeDeadStores[SSorRMId] = NULL;
-          Spills.addAvailable(SSorRMId, PhysReg);
-          // Assumes this is the last use. IsKill will be unset if reg is reused
-          // unless it's a two-address operand.
-          if (!MI.isRegTiedToDefOperand(i) &&
-              KilledMIRegs.count(VirtReg) == 0) {
-            MI.getOperand(i).setIsKill();
-            KilledMIRegs.insert(VirtReg);
-          }
-
-          UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-          DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+        // If the mapped designated register is actually the physreg we have
+        // incoming, we don't need to inserted a dead copy.
+        if (DesignatedReg == PhysReg) {
+          // If this stack slot value is already available, reuse it!
+          if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+            DEBUG(dbgs() << "Reusing RM#"
+                  << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+          else
+            DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+          DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+                << " for vreg" << VirtReg
+                << " instead of reloading into same physreg.\n");
+          unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+          MI.getOperand(i).setReg(RReg);
+          MI.getOperand(i).setSubReg(0);
+          ReusedOperands.markClobbered(RReg);
+          ++NumReused;
+          continue;
         }
-        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+
+        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+        MRI->setPhysRegUsed(DesignatedReg);
+        ReusedOperands.markClobbered(DesignatedReg);
+
+        // Back-schedule reloads and remats.
+        MachineBasicBlock::iterator InsertLoc =
+          ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                           SSorRMId, TII, MF);
+
+        TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
+
+        MachineInstr *CopyMI = prior(InsertLoc);
+        CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+        UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+        // This invalidates DesignatedReg.
+        Spills.ClobberPhysReg(DesignatedReg);
+
+        Spills.addAvailable(ReuseSlot, DesignatedReg);
+        unsigned RReg =
+          SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
         MI.getOperand(i).setReg(RReg);
         MI.getOperand(i).setSubReg(0);
-      }
+        DEBUG(dbgs() << '\t' << *prior(MII));
+        ++NumReused;
+        continue;
+      } // if (PhysReg)
 
-      // Ok - now we can remove stores that have been confirmed dead.
-      for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
-        // This was the last use and the spilled value is still available
-        // for reuse. That means the spill was unnecessary!
-        int PDSSlot = PotentialDeadStoreSlots[j];
-        MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
-        if (DeadStore) {
-          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-          InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-          VRM.RemoveMachineInstrFromMaps(DeadStore);
-          MBB.erase(DeadStore);
-          MaybeDeadStores[PDSSlot] = NULL;
-          ++NumDSE;
+        // Otherwise, reload it and remember that we have it.
+      PhysReg = VRM->getPhys(VirtReg);
+      assert(PhysReg && "Must map virtreg to physreg!");
+
+      // Note that, if we reused a register for a previous operand, the
+      // register we want to reload into might not actually be
+      // available.  If this occurs, use the register indicated by the
+      // reuser.
+      if (ReusedOperands.hasReuses())
+        PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                    Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+      MRI->setPhysRegUsed(PhysReg);
+      ReusedOperands.markClobbered(PhysReg);
+      if (AvoidReload)
+        ++NumAvoided;
+      else {
+        // Back-schedule reloads and remats.
+        MachineBasicBlock::iterator InsertLoc =
+          ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat,
+                           SSorRMId, TII, MF);
+
+        if (DoReMat) {
+          ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+        } else {
+          const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+          TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC);
+          MachineInstr *LoadMI = prior(InsertLoc);
+          VRM->addSpillSlotUse(SSorRMId, LoadMI);
+          ++NumLoads;
+          DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+        }
+        // This invalidates PhysReg.
+        Spills.ClobberPhysReg(PhysReg);
+
+        // Any stores to this stack slot are not dead anymore.
+        if (!DoReMat)
+          MaybeDeadStores[SSorRMId] = NULL;
+        Spills.addAvailable(SSorRMId, PhysReg);
+        // Assumes this is the last use. IsKill will be unset if reg is reused
+        // unless it's a two-address operand.
+        if (!MI.isRegTiedToDefOperand(i) &&
+            KilledMIRegs.count(VirtReg) == 0) {
+          MI.getOperand(i).setIsKill();
+          KilledMIRegs.insert(VirtReg);
+        }
+
+        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+      }
+      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+    }
+
+    // Ok - now we can remove stores that have been confirmed dead.
+    for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+      // This was the last use and the spilled value is still available
+      // for reuse. That means the spill was unnecessary!
+      int PDSSlot = PotentialDeadStoreSlots[j];
+      MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+      if (DeadStore) {
+        DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+        InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+        VRM->RemoveMachineInstrFromMaps(DeadStore);
+        MBB->erase(DeadStore);
+        MaybeDeadStores[PDSSlot] = NULL;
+        ++NumDSE;
+      }
+    }
+
+
+    DEBUG(dbgs() << '\t' << MI);
+
+
+    // If we have folded references to memory operands, make sure we clear all
+    // physical registers that may contain the value of the spilled virtual
+    // register
+    SmallSet<int, 2> FoldedSS;
+    for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
+      unsigned VirtReg = I->second.first;
+      VirtRegMap::ModRef MR = I->second.second;
+      DEBUG(dbgs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
+
+      // MI2VirtMap be can updated which invalidate the iterator.
+      // Increment the iterator first.
+      ++I;
+      int SS = VRM->getStackSlot(VirtReg);
+      if (SS == VirtRegMap::NO_STACK_SLOT)
+        continue;
+      FoldedSS.insert(SS);
+      DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
+
+      // If this folded instruction is just a use, check to see if it's a
+      // straight load from the virt reg slot.
+      if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+        int FrameIdx;
+        unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+        if (DestReg && FrameIdx == SS) {
+          // If this spill slot is available, turn it into a copy (or nothing)
+          // instead of leaving it as a load!
+          if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+            DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
+            if (DestReg != InReg) {
+              const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+              TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC);
+              MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
+              unsigned SubIdx = DefMO->getSubReg();
+              // Revisit the copy so we make sure to notice the effects of the
+              // operation on the destreg (either needing to RA it if it's
+              // virtual or needing to clobber any values if it's physical).
+              NextMII = &MI;
+              --NextMII;  // backtrack to the copy.
+              NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+              // Propagate the sub-register index over.
+              if (SubIdx) {
+                DefMO = NextMII->findRegisterDefOperand(DestReg);
+                DefMO->setSubReg(SubIdx);
+              }
+
+              // Mark is killed.
+              MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
+              KillOpnd->setIsKill();
+
+              BackTracked = true;
+            } else {
+              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+              // Unset last kill since it's being reused.
+              InvalidateKill(InReg, TRI, RegKills, KillOps);
+              Spills.disallowClobberPhysReg(InReg);
+            }
+
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            VRM->RemoveMachineInstrFromMaps(&MI);
+            MBB->erase(&MI);
+            Erased = true;
+            goto ProcessNextInst;
+          }
+        } else {
+          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+          SmallVector<MachineInstr*, 4> NewMIs;
+          if (PhysReg &&
+              TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
+            MBB->insert(MII, NewMIs[0]);
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            VRM->RemoveMachineInstrFromMaps(&MI);
+            MBB->erase(&MI);
+            Erased = true;
+            --NextMII;  // backtrack to the unfolded instruction.
+            BackTracked = true;
+            goto ProcessNextInst;
+          }
         }
       }
 
-
-      DEBUG(dbgs() << '\t' << MI);
-
-
-      // If we have folded references to memory operands, make sure we clear all
-      // physical registers that may contain the value of the spilled virtual
-      // register
-      SmallSet<int, 2> FoldedSS;
-      for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
-        unsigned VirtReg = I->second.first;
-        VirtRegMap::ModRef MR = I->second.second;
-        DEBUG(dbgs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
-
-        // MI2VirtMap be can updated which invalidate the iterator.
-        // Increment the iterator first.
-        ++I;
-        int SS = VRM.getStackSlot(VirtReg);
-        if (SS == VirtRegMap::NO_STACK_SLOT)
-          continue;
-        FoldedSS.insert(SS);
-        DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
-        
-        // If this folded instruction is just a use, check to see if it's a
-        // straight load from the virt reg slot.
-        if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
-          int FrameIdx;
-          unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
-          if (DestReg && FrameIdx == SS) {
-            // If this spill slot is available, turn it into a copy (or nothing)
-            // instead of leaving it as a load!
-            if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-              DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
-              if (DestReg != InReg) {
-                const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
-                TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
-                MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
-                unsigned SubIdx = DefMO->getSubReg();
-                // Revisit the copy so we make sure to notice the effects of the
-                // operation on the destreg (either needing to RA it if it's 
-                // virtual or needing to clobber any values if it's physical).
-                NextMII = &MI;
-                --NextMII;  // backtrack to the copy.
-                NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-                // Propagate the sub-register index over.
-                if (SubIdx) {
-                  DefMO = NextMII->findRegisterDefOperand(DestReg);
-                  DefMO->setSubReg(SubIdx);
-                }
-
-                // Mark is killed.
-                MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
-                KillOpnd->setIsKill();
-
-                BackTracked = true;
-              } else {
-                DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-                // Unset last kill since it's being reused.
-                InvalidateKill(InReg, TRI, RegKills, KillOps);
-                Spills.disallowClobberPhysReg(InReg);
-              }
-
+      // If this reference is not a use, any previous store is now dead.
+      // Otherwise, the store to this stack slot is not dead anymore.
+      MachineInstr* DeadStore = MaybeDeadStores[SS];
+      if (DeadStore) {
+        bool isDead = !(MR & VirtRegMap::isRef);
+        MachineInstr *NewStore = NULL;
+        if (MR & VirtRegMap::isModRef) {
+          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+          SmallVector<MachineInstr*, 4> NewMIs;
+          // We can reuse this physreg as long as we are allowed to clobber
+          // the value and there isn't an earlier def that has already clobbered
+          // the physreg.
+          if (PhysReg &&
+              !ReusedOperands.isClobbered(PhysReg) &&
+              Spills.canClobberPhysReg(PhysReg) &&
+              !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
+            MachineOperand *KillOpnd =
+              DeadStore->findRegisterUseOperand(PhysReg, true);
+            // Note, if the store is storing a sub-register, it's possible the
+            // super-register is needed below.
+            if (KillOpnd && !KillOpnd->getSubReg() &&
+                TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
+              MBB->insert(MII, NewMIs[0]);
+              NewStore = NewMIs[1];
+              MBB->insert(MII, NewStore);
+              VRM->addSpillSlotUse(SS, NewStore);
               InvalidateKills(MI, TRI, RegKills, KillOps);
-              VRM.RemoveMachineInstrFromMaps(&MI);
-              MBB.erase(&MI);
+              VRM->RemoveMachineInstrFromMaps(&MI);
+              MBB->erase(&MI);
               Erased = true;
-              goto ProcessNextInst;
-            }
-          } else {
-            unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-            SmallVector<MachineInstr*, 4> NewMIs;
-            if (PhysReg &&
-                TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
-              MBB.insert(MII, NewMIs[0]);
-              InvalidateKills(MI, TRI, RegKills, KillOps);
-              VRM.RemoveMachineInstrFromMaps(&MI);
-              MBB.erase(&MI);
-              Erased = true;
+              --NextMII;
               --NextMII;  // backtrack to the unfolded instruction.
               BackTracked = true;
+              isDead = true;
+              ++NumSUnfold;
+            }
+          }
+        }
+
+        if (isDead) {  // Previous store is dead.
+          // If we get here, the store is dead, nuke it now.
+          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+          InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+          VRM->RemoveMachineInstrFromMaps(DeadStore);
+          MBB->erase(DeadStore);
+          if (!NewStore)
+            ++NumDSE;
+        }
+
+        MaybeDeadStores[SS] = NULL;
+        if (NewStore) {
+          // Treat this store as a spill merged into a copy. That makes the
+          // stack slot value available.
+          VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
+          goto ProcessNextInst;
+        }
+      }
+
+      // If the spill slot value is available, and this is a new definition of
+      // the value, the value is not available anymore.
+      if (MR & VirtRegMap::isMod) {
+        // Notice that the value in this stack slot has been modified.
+        Spills.ModifyStackSlotOrReMat(SS);
+
+        // If this is *just* a mod of the value, check to see if this is just a
+        // store to the spill slot (i.e. the spill got merged into the copy). If
+        // so, realize that the vreg is available now, and add the store to the
+        // MaybeDeadStore info.
+        int StackSlot;
+        if (!(MR & VirtRegMap::isRef)) {
+          if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+            assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+                   "Src hasn't been allocated yet?");
+
+            if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
+                                    Spills, RegKills, KillOps, TRI)) {
+              NextMII = llvm::next(MII);
+              BackTracked = true;
               goto ProcessNextInst;
             }
-          }
-        }
 
-        // If this reference is not a use, any previous store is now dead.
-        // Otherwise, the store to this stack slot is not dead anymore.
-        MachineInstr* DeadStore = MaybeDeadStores[SS];
-        if (DeadStore) {
-          bool isDead = !(MR & VirtRegMap::isRef);
-          MachineInstr *NewStore = NULL;
-          if (MR & VirtRegMap::isModRef) {
-            unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-            SmallVector<MachineInstr*, 4> NewMIs;
-            // We can reuse this physreg as long as we are allowed to clobber
-            // the value and there isn't an earlier def that has already clobbered
-            // the physreg.
-            if (PhysReg &&
-                !ReusedOperands.isClobbered(PhysReg) &&
-                Spills.canClobberPhysReg(PhysReg) &&
-                !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
-              MachineOperand *KillOpnd =
-                DeadStore->findRegisterUseOperand(PhysReg, true);
-              // Note, if the store is storing a sub-register, it's possible the
-              // super-register is needed below.
-              if (KillOpnd && !KillOpnd->getSubReg() &&
-                  TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
-                MBB.insert(MII, NewMIs[0]);
-                NewStore = NewMIs[1];
-                MBB.insert(MII, NewStore);
-                VRM.addSpillSlotUse(SS, NewStore);
-                InvalidateKills(MI, TRI, RegKills, KillOps);
-                VRM.RemoveMachineInstrFromMaps(&MI);
-                MBB.erase(&MI);
-                Erased = true;
-                --NextMII;
-                --NextMII;  // backtrack to the unfolded instruction.
-                BackTracked = true;
-                isDead = true;
-                ++NumSUnfold;
-              }
-            }
-          }
+            // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
+            // this as a potentially dead store in case there is a subsequent
+            // store into the stack slot without a read from it.
+            MaybeDeadStores[StackSlot] = &MI;
 
-          if (isDead) {  // Previous store is dead.
-            // If we get here, the store is dead, nuke it now.
-            DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-            InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-            VRM.RemoveMachineInstrFromMaps(DeadStore);
-            MBB.erase(DeadStore);
-            if (!NewStore)
-              ++NumDSE;
-          }
-
-          MaybeDeadStores[SS] = NULL;
-          if (NewStore) {
-            // Treat this store as a spill merged into a copy. That makes the
-            // stack slot value available.
-            VRM.virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
-            goto ProcessNextInst;
-          }
-        }
-
-        // If the spill slot value is available, and this is a new definition of
-        // the value, the value is not available anymore.
-        if (MR & VirtRegMap::isMod) {
-          // Notice that the value in this stack slot has been modified.
-          Spills.ModifyStackSlotOrReMat(SS);
-          
-          // If this is *just* a mod of the value, check to see if this is just a
-          // store to the spill slot (i.e. the spill got merged into the copy). If
-          // so, realize that the vreg is available now, and add the store to the
-          // MaybeDeadStore info.
-          int StackSlot;
-          if (!(MR & VirtRegMap::isRef)) {
-            if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
-              assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
-                     "Src hasn't been allocated yet?");
-
-              if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
-                                      Spills, RegKills, KillOps, TRI, VRM)) {
-                NextMII = llvm::next(MII);
-                BackTracked = true;
-                goto ProcessNextInst;
-              }
-
-              // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
-              // this as a potentially dead store in case there is a subsequent
-              // store into the stack slot without a read from it.
-              MaybeDeadStores[StackSlot] = &MI;
-
-              // If the stack slot value was previously available in some other
-              // register, change it now.  Otherwise, make the register
-              // available in PhysReg.
-              Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
-            }
+            // If the stack slot value was previously available in some other
+            // register, change it now.  Otherwise, make the register
+            // available in PhysReg.
+            Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
           }
         }
       }
-
-      // Process all of the spilled defs.
-      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI.getOperand(i);
-        if (!(MO.isReg() && MO.getReg() && MO.isDef()))
-          continue;
-
-        unsigned VirtReg = MO.getReg();
-        if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
-          // Check to see if this is a noop copy.  If so, eliminate the
-          // instruction before considering the dest reg to be changed.
-          // Also check if it's copying from an "undef", if so, we can't
-          // eliminate this or else the undef marker is lost and it will
-          // confuses the scavenger. This is extremely rare.
-          unsigned Src, Dst, SrcSR, DstSR;
-          if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
-              !MI.findRegisterUseOperand(Src)->isUndef()) {
-            ++NumDCE;
-            DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-            SmallVector<unsigned, 2> KillRegs;
-            InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
-            if (MO.isDead() && !KillRegs.empty()) {
-              // Source register or an implicit super/sub-register use is killed.
-              assert(KillRegs[0] == Dst ||
-                     TRI->isSubRegister(KillRegs[0], Dst) ||
-                     TRI->isSuperRegister(KillRegs[0], Dst));
-              // Last def is now dead.
-              TransferDeadness(&MBB, Dist, Src, RegKills, KillOps, VRM);
-            }
-            VRM.RemoveMachineInstrFromMaps(&MI);
-            MBB.erase(&MI);
-            Erased = true;
-            Spills.disallowClobberPhysReg(VirtReg);
-            goto ProcessNextInst;
-          }
-
-          // If it's not a no-op copy, it clobbers the value in the destreg.
-          Spills.ClobberPhysReg(VirtReg);
-          ReusedOperands.markClobbered(VirtReg);
-   
-          // Check to see if this instruction is a load from a stack slot into
-          // a register.  If so, this provides the stack slot value in the reg.
-          int FrameIdx;
-          if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
-            assert(DestReg == VirtReg && "Unknown load situation!");
-
-            // If it is a folded reference, then it's not safe to clobber.
-            bool Folded = FoldedSS.count(FrameIdx);
-            // Otherwise, if it wasn't available, remember that it is now!
-            Spills.addAvailable(FrameIdx, DestReg, !Folded);
-            goto ProcessNextInst;
-          }
-              
-          continue;
-        }
-
-        unsigned SubIdx = MO.getSubReg();
-        bool DoReMat = VRM.isReMaterialized(VirtReg);
-        if (DoReMat)
-          ReMatDefs.insert(&MI);
-
-        // The only vregs left are stack slot definitions.
-        int StackSlot = VRM.getStackSlot(VirtReg);
-        const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
-
-        // If this def is part of a two-address operand, make sure to execute
-        // the store from the correct physical register.
-        unsigned PhysReg;
-        unsigned TiedOp;
-        if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
-          PhysReg = MI.getOperand(TiedOp).getReg();
-          if (SubIdx) {
-            unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
-            assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
-                   "Can't find corresponding super-register!");
-            PhysReg = SuperReg;
-          }
-        } else {
-          PhysReg = VRM.getPhys(VirtReg);
-          if (ReusedOperands.isClobbered(PhysReg)) {
-            // Another def has taken the assigned physreg. It must have been a
-            // use&def which got it due to reuse. Undo the reuse!
-            PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, 
-                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
-          }
-        }
-
-        assert(PhysReg && "VR not assigned a physical register?");
-        RegInfo->setPhysRegUsed(PhysReg);
-        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-        ReusedOperands.markClobbered(RReg);
-        MI.getOperand(i).setReg(RReg);
-        MI.getOperand(i).setSubReg(0);
-
-        if (!MO.isDead()) {
-          MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
-          SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true,
-                            LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM);
-          NextMII = llvm::next(MII);
-
-          // Check to see if this is a noop copy.  If so, eliminate the
-          // instruction before considering the dest reg to be changed.
-          {
-            unsigned Src, Dst, SrcSR, DstSR;
-            if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
-              ++NumDCE;
-              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-              InvalidateKills(MI, TRI, RegKills, KillOps);
-              VRM.RemoveMachineInstrFromMaps(&MI);
-              MBB.erase(&MI);
-              Erased = true;
-              UpdateKills(*LastStore, TRI, RegKills, KillOps);
-              goto ProcessNextInst;
-            }
-          }
-        }    
-      }
-    ProcessNextInst:
-      // Delete dead instructions without side effects.
-      if (!Erased && !BackTracked && isSafeToDelete(MI)) {
-        InvalidateKills(MI, TRI, RegKills, KillOps);
-        VRM.RemoveMachineInstrFromMaps(&MI);
-        MBB.erase(&MI);
-        Erased = true;
-      }
-      if (!Erased)
-        DistanceMap.insert(std::make_pair(&MI, Dist++));
-      if (!Erased && !BackTracked) {
-        for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
-          UpdateKills(*II, TRI, RegKills, KillOps);
-      }
-      MII = NextMII;
     }
 
-  }
+    // Process all of the spilled defs.
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!(MO.isReg() && MO.getReg() && MO.isDef()))
+        continue;
 
-};
+      unsigned VirtReg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        // Also check if it's copying from an "undef", if so, we can't
+        // eliminate this or else the undef marker is lost and it will
+        // confuses the scavenger. This is extremely rare.
+        unsigned Src, Dst, SrcSR, DstSR;
+        if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
+            !MI.findRegisterUseOperand(Src)->isUndef()) {
+          ++NumDCE;
+          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+          SmallVector<unsigned, 2> KillRegs;
+          InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+          if (MO.isDead() && !KillRegs.empty()) {
+            // Source register or an implicit super/sub-register use is killed.
+            assert(KillRegs[0] == Dst ||
+                   TRI->isSubRegister(KillRegs[0], Dst) ||
+                   TRI->isSuperRegister(KillRegs[0], Dst));
+            // Last def is now dead.
+            TransferDeadness(Src, RegKills, KillOps);
+          }
+          VRM->RemoveMachineInstrFromMaps(&MI);
+          MBB->erase(&MI);
+          Erased = true;
+          Spills.disallowClobberPhysReg(VirtReg);
+          goto ProcessNextInst;
+        }
+
+        // If it's not a no-op copy, it clobbers the value in the destreg.
+        Spills.ClobberPhysReg(VirtReg);
+        ReusedOperands.markClobbered(VirtReg);
+
+        // Check to see if this instruction is a load from a stack slot into
+        // a register.  If so, this provides the stack slot value in the reg.
+        int FrameIdx;
+        if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+          assert(DestReg == VirtReg && "Unknown load situation!");
+
+          // If it is a folded reference, then it's not safe to clobber.
+          bool Folded = FoldedSS.count(FrameIdx);
+          // Otherwise, if it wasn't available, remember that it is now!
+          Spills.addAvailable(FrameIdx, DestReg, !Folded);
+          goto ProcessNextInst;
+        }
+
+        continue;
+      }
+
+      unsigned SubIdx = MO.getSubReg();
+      bool DoReMat = VRM->isReMaterialized(VirtReg);
+      if (DoReMat)
+        ReMatDefs.insert(&MI);
+
+      // The only vregs left are stack slot definitions.
+      int StackSlot = VRM->getStackSlot(VirtReg);
+      const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+      // If this def is part of a two-address operand, make sure to execute
+      // the store from the correct physical register.
+      unsigned PhysReg;
+      unsigned TiedOp;
+      if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
+        PhysReg = MI.getOperand(TiedOp).getReg();
+        if (SubIdx) {
+          unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
+          assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
+                 "Can't find corresponding super-register!");
+          PhysReg = SuperReg;
+        }
+      } else {
+        PhysReg = VRM->getPhys(VirtReg);
+        if (ReusedOperands.isClobbered(PhysReg)) {
+          // Another def has taken the assigned physreg. It must have been a
+          // use&def which got it due to reuse. Undo the reuse!
+          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                      Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+        }
+      }
+
+      assert(PhysReg && "VR not assigned a physical register?");
+      MRI->setPhysRegUsed(PhysReg);
+      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+      ReusedOperands.markClobbered(RReg);
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+
+      if (!MO.isDead()) {
+        MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+        SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
+          LastStore, Spills, ReMatDefs, RegKills, KillOps);
+        NextMII = llvm::next(MII);
+
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        {
+          unsigned Src, Dst, SrcSR, DstSR;
+          if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+            ++NumDCE;
+            DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            VRM->RemoveMachineInstrFromMaps(&MI);
+            MBB->erase(&MI);
+            Erased = true;
+            UpdateKills(*LastStore, TRI, RegKills, KillOps);
+            goto ProcessNextInst;
+          }
+        }
+      }
+    }
+    ProcessNextInst:
+    // Delete dead instructions without side effects.
+    if (!Erased && !BackTracked && isSafeToDelete(MI)) {
+      InvalidateKills(MI, TRI, RegKills, KillOps);
+      VRM->RemoveMachineInstrFromMaps(&MI);
+      MBB->erase(&MI);
+      Erased = true;
+    }
+    if (!Erased)
+      DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
+    if (!Erased && !BackTracked) {
+      for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
+        UpdateKills(*II, TRI, RegKills, KillOps);
+    }
+    MII = NextMII;
+  }
 
 }
 
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
index 44f9df6..93474e0 100644
--- a/lib/CodeGen/VirtRegRewriter.h
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -10,11 +10,10 @@
 #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
 #define LLVM_CODEGEN_VIRTREGREWRITER_H
 
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "VirtRegMap.h"
-
 namespace llvm {
+  class LiveIntervals;
+  class MachineFunction;
+  class VirtRegMap;
   
   /// VirtRegRewriter interface: Implementations of this interface assign
   /// spilled virtual registers to stack slots, rewriting the code.